def _url_and_destination(self, base_url, unit): """ Get the download URL and download destination. :param base_url: The base URL. :type base_url: str :param unit: A content unit. :type unit: dict :return: (url, destination) :rtype: tuple(2) """ storage_path = unit[constants.STORAGE_PATH] tar_path = unit.get(constants.TARBALL_PATH) if not tar_path: # The pulp/nodes/content endpoint provides all content. # This replaced the publishing of individual links for each unit. parsed = urlparse(base_url) relative_path = unit[constants.RELATIVE_PATH] path = pathlib.join(constants.CONTENT_PATH, pathlib.quote(relative_path)) base_url = ParseResult( scheme=parsed.scheme, netloc=parsed.netloc, path=path, params=parsed.params, query=parsed.query, fragment=parsed.fragment) return base_url.geturl(), storage_path else: return pathlib.url_join(base_url, pathlib.quote(tar_path)),\ pathlib.join(os.path.dirname(storage_path), os.path.basename(tar_path))
def build_url(host, scheme=None, port=None): """ Build a valid URL. IPv6 addresses specified in host will be enclosed in brackets automatically. >>> build_url('example.com', 'https', 443) 'https://example.com:443' >>> build_url(host='example.com', port=443) '//example.com:443' >>> build_url('fce:9af7:a667:7286:4917:b8d3:34df:8373', port=80, scheme='http') 'http://[fce:9af7:a667:7286:4917:b8d3:34df:8373]:80' :param scheme: The scheme, e.g. http, https or ftp. :type scheme: str :param host: Consisting of either a registered name (including but not limited to a hostname) or an IP address. :type host: str :type port: int :rtype: str """ netloc = host if not is_valid_ipv6_address(host) else '[{}]'.format(host) if port: netloc += ':{}'.format(port) pr = ParseResult(scheme=scheme, netloc=netloc, path='', params='', query='', fragment='') return pr.geturl()
def _get_real_path(self, request): """ return the URI needed for further processing. This is either a relative path if the request is to be processed internally or a full URI if the request is retargeted """ # TODO: kca: deal with more complex host headers environ = request.environ path = environ["PATH_INFO"] if path.startswith('/'): return path parsed = urlparse(path) host_header = environ.get("HTTP_HOST") self.logger.debug("Host header: %s", host_header) # prefer the HOST header netloc = host_header or parsed.netloc if not netloc: # no absolute URI and no host header -> not retargeted return path if netloc[0] == "[": target_host, _, target_port = netloc[1:].partition(']') if target_port: target_port = target_port[1:] else: target_host, _, target_port = netloc.partition(":") if not target_port: target_port = parsed.scheme.lower() == "https" and "443" or "5000" target = (target_host, target_port) if target in self.__cache: return urlunparse(ParseResult("", "", *parsed[2:])) if (target_port == environ["SERVER_PORT"] and (self._resolve_host(target_host) & self.__addresses or target_host in self.__valid_hostnames)): # port and host match -> not retargeted self.__cache.add(target) return urlunparse(ParseResult("", "", *parsed[2:])) # request is retargeted # # construct full URI if needed if not parsed.netloc: path = netloc + path if not parsed.scheme: path = request.environ['wsgi.url_scheme'] + "://" + path elif not parsed.scheme: path = "http://" + path print(path) return path
def setUp(self): webapp.app.config['TESTING'] = True # to get better error messages self.app = webapp.app.test_client() webapp.default_theme = 'default' # set some defaults self.test_results = [ { 'content': 'first test content', 'title': 'First Test', 'url': 'http://first.test.xyz', 'engines': ['youtube', 'startpage'], 'engine': 'startpage', 'parsed_url': ParseResult(scheme='http', netloc='first.test.xyz', path='/', params='', query='', fragment=''), # noqa }, { 'content': 'second test content', 'title': 'Second Test', 'url': 'http://second.test.xyz', 'engines': ['youtube', 'startpage'], 'engine': 'youtube', 'parsed_url': ParseResult(scheme='http', netloc='second.test.xyz', path='/', params='', query='', fragment=''), # noqa }, ] def search_mock(search_self, *args): search_self.result_container = Mock(get_ordered_results=lambda: self.test_results, answers=set(), suggestions=set(), infoboxes=[], results=self.test_results, results_length=lambda: len(self.test_results)) webapp.Search.search = search_mock self.maxDiff = None # to see full diffs
def url_for_first_service_with_name(self, name, scheme=None): locator = self.credentials_locator find = locator.find_credentials_for_first_service_with_name credentials = find(name) if not credentials: return None if 'uri' in credentials: return credentials['uri'] scheme = scheme or '' username = credentials.get('username', None) hostname = credentials.get('hostname', None) password = credentials.get('password', None) username_password = '' if username or password: username_password = "******" % (username or '', password or '') user_pass_and_hostname = '%s%s' % (username_password, hostname) port = credentials.get('port', None) port = '' if not port else ':%s' % port netloc = '%s%s' % (user_pass_and_hostname, port) parse_result = ParseResult(scheme=scheme, netloc=netloc, path='', params='', query='', fragment='') return parse_result.geturl()
def munge_url(url): parsed_url = urlparse(url) # On Windows, assume that a one-letter scheme and no host means we # originally had something like c:/foo. if not parsed_url.scheme or (sys.platform == 'win32' and not parsed_url.netloc and len(parsed_url.scheme) == 1): if parsed_url.scheme: path = b'%s:%s' % (parsed_url.scheme, parsed_url.path) else: path = parsed_url.path return ParseResult(b'file', b'', path, parsed_url.params, parsed_url.query, parsed_url.fragment) if parsed_url.scheme != b'hg': return parsed_url proto = b'https' host = parsed_url.netloc if b':' in host: host, port = host.rsplit(b':', 1) if b'.' in port: port, proto = port.split(b'.', 1) if not port.isdigit(): proto = port port = None if port: host = host + b':' + port return ParseResult(proto, host, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)
def _buildCookiedSession(self, base_url, referer=u''): parsed = urlparse(base_url) # First request logging(u'Build Cookies Request #1:') res = self._get(parsed.geturl(), headers={ u'Accept': u'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', u'Upgrade-Insecure-Requests': u'1', u'Host': parsed.netloc, }, referer=referer) self.lastReferer = res.url # print res.content + '\n' # Second request to get js content JS = re.search(r'src\=\"\/(ser\-.*\.js)\"', res.content).group(1) url_2 = ParseResult(parsed.scheme, parsed.netloc, JS, u'', u'', u'').geturl() time.sleep(random.random()) logging(u'Build Cookies Request #2:') res = self._get(url_2, referer=self.lastReferer) # print res.content + '\n' # Third request to post js to get cookies PID = re.search(r'FingerprintWrapper\(\{path\:\"\/.*?\?(PID\=.*?)\"\,', res.content).group(1) AJAX = re.search(r'FingerprintWrapper.*?ajax_header\:\"(.*?)\"\,interval', res.content).group(1) url_3 = ParseResult(parsed.scheme, parsed.netloc, JS, '', PID, '').geturl() time.sleep(random.random()) logging(u'Build Cookies Request #3:') res = self._post(url_3, data={ u'p': proof(cfg.p) }, headers={ u'Accept': u'*/*', u'X-Distil-Ajax': AJAX }, referer=self.lastReferer) # print res.__dict__ return res
def url_build(scheme='http', netloc='', path='', params='', query='', fragment=''): with app.app_context(): netloc=app.config['SERVER_NAME'] u = ParseResult(scheme=scheme, netloc=netloc, path=path, params=params, query=query, fragment=fragment) return u.geturl()
def handleLogin(): app.logger.info('handleLogin [%s]' % request.method) me = None redirectURI = '%s/success' % cfg.baseurl fromURI = request.args.get('from_uri') app.logger.info('redirectURI [%s] fromURI [%s]' % (redirectURI, fromURI)) form = LoginForm(me='', client_id=cfg.client_id, redirect_uri=redirectURI, from_uri=fromURI) if form.validate_on_submit(): app.logger.info('me [%s]' % form.me.data) me = 'https://%s/' % baseDomain(form.me.data, includeScheme=False) authEndpoints = ninka.indieauth.discoverAuthEndpoints(me) if 'authorization_endpoint' in authEndpoints: authURL = None for url in authEndpoints['authorization_endpoint']: authURL = url break if authURL is not None: url = ParseResult(authURL.scheme, authURL.netloc, authURL.path, authURL.params, urllib.urlencode({ 'me': me, 'redirect_uri': form.redirect_uri.data, 'client_id': form.client_id.data, 'scope': 'post', 'response_type': 'id' }), authURL.fragment).geturl() if db is not None: key = 'login-%s' % me data = db.hgetall(key) if data and 'token' in data: # clear any existing auth data db.delete('token-%s' % data['token']) db.hdel(key, 'token') db.hset(key, 'auth_url', ParseResult(authURL.scheme, authURL.netloc, authURL.path, '', '', '').geturl()) db.hset(key, 'from_uri', form.from_uri.data) db.hset(key, 'redirect_uri', form.redirect_uri.data) db.hset(key, 'client_id', form.client_id.data) db.hset(key, 'scope', 'post') db.expire(key, cfg.auth_timeout) # expire in N minutes unless successful app.logger.info('redirecting to [%s]' % url) return redirect(url) else: return 'insert fancy no auth endpoint found error message here', 403 templateContext = {} templateContext['title'] = 'Sign In' templateContext['form'] = form return render_template('login.jinja', **templateContext)
def pre_resolve_request(self, request): uri = urlparse(request.url) ip, port = CachingResolver.get(uri.hostname, uri.port or 80) if uri.scheme == "https": port = 443 request.headers["Host"] = uri.hostname pr = ParseResult(uri.scheme, "%s:%s" % (ip, port), uri.path, uri.params, uri.query, uri.fragment) request.url = pr.geturl() return request
def download_key_http(self, address, port): url = ParseResult( scheme='http', # This seems to work well enough with both IPv6 and IPv4 netloc="[[%s]]:%d" % (address, port), path='/', params='', query='', fragment='') return requests.get(url.geturl()).text
def uriWithoutSuffix(self): from urlparse import ParseResult uriCopy = ParseResult(scheme=self._parsedUri.scheme, netloc=self._parsedUri.netloc, path=self._parsedUri.path, params='', query=self._parsedUri.query, fragment='') uriCopy = quoteParseResults(uriCopy) if self._doUnquote else uriCopy return uriCopy.geturl()
def authenticate(self, request): """ Check the request for authenticated user. If user is not authenticated then redirect user to login view. """ next_url = request.GET.get("next", None) # Get the service name from request service = self._get_service(request) if not service: return HttpResponseForbidden("Invalid service") validator = DefaultValidation(service.key) try: next_url = urlparse(urllib.unquote(next_url).decode("utf8")) except AttributeError: if "HTTP_REFERER" in request.META: next_url = urlparse(request.META["REFERER"]) else: next_url = urlparse(service.default_url) # Retreive the referer GET parameters and make a new one params = dict(parse_qsl(next_url[4])) # Does user authenticated before? if request.user.is_authenticated(): logger.debug("User is authenticated.") # If user is authenticated in Daarmaan then a ticket # (user session ID) will send back to service # IMPORTANT: is using session id of daarmaan as ticket ok? ticket = request.session.session_key logger.debug("[TICKET]: %s" % ticket) params.update({'ticket': ticket, "hash": validator.sign(ticket)}) else: # If user is not authenticated simple ack answer will return logger.debug("User is NOT authenticated.") params.update({"ack": " "}) next_url = ParseResult(next_url[0], next_url[1], next_url[2], next_url[3], urllib.urlencode(params), next_url[5]) next_url = next_url.geturl() return HttpResponseRedirect(next_url)
def __call__(self, request): """Insert the token after the path element of the URL""" url = urlparse(request.url) new_url = ParseResult(scheme=url.scheme, netloc=url.netloc, path=join(url.path, self.token), params=url.params, query=url.query, fragment=url.fragment) request.url = new_url.geturl() return request
def should_follow(self, response, spider): parsed = urlparse(response.url) url = ParseResult( parsed.scheme, parsed.netloc, parsed.path, parsed.params, None, None ) url = url.geturl() return url not in spider.disallow_urls
def download_key_http(self, address, port): url = ParseResult( scheme='http', # This seems to work well enough with both IPv6 and IPv4 netloc="[[%s]]:%d" % (address, port), path='/', params='', query='', fragment='') self.log.debug("Starting HTTP request") data = requests.get(url.geturl(), timeout=5).content self.log.debug("finished downloading %d bytes", len(data)) return data
def handle_authcode(request, client, redirection_uri, state=None): parts = urlparse(redirection_uri.uri) qparams = dict(parse_qsl(parts.query)) user_id = authenticated_userid(request) auth_code = Oauth2Code(client, user_id, redirection_uri) db.add(auth_code) db.flush() qparams["code"] = auth_code.authcode if state: qparams["state"] = state parts = ParseResult(parts.scheme, parts.netloc, parts.path, parts.params, urlencode(qparams), "") return HTTPFound(location=parts.geturl())
def url_add_query(url, **kw): """ In python2.6 urlparse parses a url into a ParseResult object while in prior version urlparse's result is a tuple of six elements. """ u = urlparse(url) added_query = urllib.urlencode(kw) query = u.query if u.query: query = added_query + '&' + query else: query = added_query p = ParseResult(u.scheme, u.netloc, u.path, u.params, query, u.fragment) return p.geturl()
def __init__(self, delay=5): super(SearchUrlCrawler, self).__init__(delay=delay) with codecs.open( os.path.realpath(os.path.join(os.getcwd(), u'keywords.csv')), u'rb', u'utf-8') as csvfile: csvreader = csv.reader(csvfile, delimiter=',', quotechar='"') self.queries = OrderedDict() for row in csvreader: query = OrderedDict([(u'city', row[1]), (u'state', row[2]), (u'kw', row[0])]) parsed = ParseResult(u'https', u'www.manta.com', u'/api/v1/location', u'', urlencode(query), u'') self.queries[parsed.geturl()] = row self.crawl_urls.append(parsed.geturl())
def runTest(self): me = 'http://127.0.0.1:9999' r = ninka.discoverTokenEndpoints(me) assert 'token_endpoint' in r tokenURL = None for url in r['token_endpoint']: tokenURL = url break assert tokenURL is not None assert tokenURL.scheme == 'http' assert tokenURL.netloc == '127.0.0.1:9999' assert tokenURL.path == '/token' url = ParseResult(tokenURL.scheme, tokenURL.netloc, tokenURL.path, tokenURL.params, urllib.urlencode({ 'me': me, 'scope': 'post' }), tokenURL.fragment).geturl() r = requests.get(url) # token GET with no access_token present assert r.status_code == 400
def _connect_to_host(self): # Obtiene puerto y host if self.is_connect: self.hostname, self.port = self.path.split(':') else: u = urlparse(self.path) if u.scheme != 'http': raise UnsupportedSchemeException('Unknown scheme %s' % repr(u.scheme)) self.hostname = u.hostname self.port = u.port or 80 self.path = urlunparse( ParseResult(scheme='', netloc='', params=u.params, path=u.path or '/', query=u.query, fragment=u.fragment)) # conecion al destino self._proxy_sock = socket() self._proxy_sock.settimeout(48000) self._proxy_sock.connect((self.hostname, int(self.port))) # Si requiere SSL conectar a wrap_socket if self.is_connect: self._proxy_sock = ssl.wrap_socket(self._proxy_sock)
def _get_access_token(self, url): """Returns the current access token. Generates a new access token if no current access token can be found""" if self.access_token: return self.access_token data = "client_id=%s&client_secret=%s&grant_type=password&username=%s&password=%s&scope=write" %\ (self.client_id, self.client_secret, self.username, self.password) parsed = urlparse(url) path = urlunparse( ParseResult(parsed.scheme, parsed.netloc, "/oauth2/access_token", None, None, None)) auth_resp = urlopen(Request(path, data), timeout=10) if auth_resp.getcode() != 200: self.logger.error("Error with client credentials") return self.access_token auth_resp_data = json.loads(auth_resp.read()) if "access_token" in auth_resp_data: self.access_token = auth_resp_data["access_token"] else: self.logger.error("Error with client credentials") return self.access_token
def crawl(self, url, **kwargs): parsed = urlparse(url) logging(u'Preparing crawler for "{}"'.format(parsed.geturl())) params = { u'httpReferrer': u'?'.join([parsed.path, parsed.query]), u'uid': self.session.cookies[u'D_ZUID'], } url_4 = ParseResult(parsed.scheme, parsed.netloc, u'distil_identify_cookie.html', u'', u'', u'').geturl() try: res = self._get(url_4, params=params, referer=self.lastReferer, **kwargs) if res.status_code == 200: logging(u'"{}" - Crawled'.format(parsed.geturl())) return res, html.fromstring(res.content) elif res.status_code == 405: logging(u'HTTP 405 detected, looks like Captcha is stopping us! Change proxy and try again . . .') res.raise_for_status() elif res.status_code == 409: logging(u'HTTP 409 detected, try rebuilding cookies . . .') self.session.headers = cfg.base_headers self._buildCookiedSession(cfg.manta_base_url) return self.crawl(url) else: logging(u'HTTP {} detected, not sure how to handle this . . .'.format(res.status_code)) res.raise_for_status() except Exception, e: logging(u'"{}" - Error: {}'.format(parsed.geturl(), unicode(e))) raise
def _connect_to_host(self): # Get hostname and port to connect to if self.is_connect: self.hostname, self.port = self.path.split(':') else: u = urlparse(self.path) if u.scheme != 'http': raise UnsupportedSchemeException('Unknown scheme %s' % repr(u.scheme)) self.hostname = u.hostname self.port = u.port or 80 self.path = urlunparse( ParseResult(scheme='', netloc='', params=u.params, path=u.path or '/', query=u.query, fragment=u.fragment)) # Connect to destination self._proxy_sock = socks.socksocket() #socket() self._proxy_sock.settimeout(10) self._proxy_sock.setproxy(socks.PROXY_TYPE_HTTP, "127.0.0.1", 8080) self._proxy_sock.connect((self.hostname, int(self.port))) # Wrap socket if SSL is required if self.is_connect: self._proxy_sock = wrap_socket(self._proxy_sock)
def send_request_indication(self, request_indication): # TODO: rewrite HTTP request handling totally with Promise() as p: fullpath = request_indication.path parsed = urlparse(fullpath) request_indication = copy(request_indication) request_indication.path = urlunparse( ParseResult("", "", *parsed[2:])) try: client = XIXClient( parsed.scheme + "://" + parsed.netloc, scl_base=None, etsi_v1_compatibility=self.etsi_compatibility_needed, etsi_v2=self.etsi_version_v2) response = client.send_request_indication(request_indication) except OpenMTCNetworkError as e: response = ErrorResponseConfirmation(STATUS_BAD_GATEWAY, request_indication.method, str(e)) p.reject(response) except SCLError as e: response = ErrorResponseConfirmation(e.statusCode, request_indication.method, str(e)) p.reject(response) else: #response = self._convert_response(fullpath, request_indication, response) if request_indication.method == "retrieve" and hasattr( response.resource, "path"): response.resource.path = fullpath p.fulfill(response) return p
def handle_authcode(request, client, redirection_uri, state=None): parts = urlparse(redirection_uri.uri) qparams = dict(parse_qsl(parts.query)) user_id = authenticated_userid(request) auth_code = Oauth2Code(client, user_id) db.add(auth_code) db.flush() qparams['code'] = auth_code.authcode if state: qparams['state'] = state parts = ParseResult( parts.scheme, parts.netloc, parts.path, parts.params, urlencode(qparams), '') return HTTPFound(location=parts.geturl())
def _get_worker_uris(workers, filename, scheme='file', include_path=True, ports=None): return [ParseResult(scheme=scheme, netloc=worker['hostname'] + (':' + str(port) if port else ''), path=os.path.join(worker['path'] if include_path else '', filename or ''), params='', query='', fragment='').geturl() for worker, port in izip_longest(workers, ports or [])]
def _connect_to_host(self): if self.connect_through_ssl: self.hostname, self.port = self.path.split(':') else: u = urlparse(self.path) if u.scheme != 'http': print 'ERROR Unknown request scheme: %s' % repr(u.scheme) self.hostname = u.hostname self.port = u.port or 80 self.path = urlunparse( ParseResult( scheme='http', netloc='%s' % u.hostname, params=u.params, path=u.path or '/', query=u.query, fragment=u.fragment ) ) # Create a pipe to the remote server self._pipe_socket = ProxySocket( self._proxy_fetcher, chainlength=self.CHAIN, use_ssl=self.connect_through_ssl, DEBUG=self.DEBUG ) self._pipe_socket.connect( (self.hostname, int(self.port)) ) # Wrap socket if SSL is required if self.connect_through_ssl: self._pipe_socket = wrap_socket(self._pipe_socket)
def get_callback_url(self): from urlparse import urlparse, ParseResult import urllib callback_url_parse = urlparse(self.callback) query_args = { 'fwd_fee' : self.miners_fee, 'value' : self.value, 'input_address' : self.input_address, 'confirmations' : self.confirmations, 'transaction_hash' : self.transaction_hash, 'input_transaction_hash': self.input_transaction_hash, 'destination_address' : self.destination_address, 'payee_addresses' : self.payee_addresses } if not callback_url_parse.query: url_query = urllib.urlencode(query_args) else: url_query = callback_url_parse.query + '&' + urllib.urlencode(query_args) callback_url = ParseResult(scheme=callback_url_parse.scheme , netloc=callback_url_parse.netloc, path=callback_url_parse.path, params=callback_url_parse.params, query= url_query, fragment=callback_url_parse.fragment).geturl() return callback_url
def _connect_to_host(self): ''' Establishes an ssl tunnel if it is a https request ''' # Get hostname and port to connect to if self.is_connect: self.hostname, self.port = self.path.split(':') else: u = urlparse(self.path) if u.scheme != 'http': raise UnsupportedSchemeException('Unknown scheme %s' % repr(u.scheme)) self.hostname = u.hostname self.port = u.port or 80 self.path = urlunparse( ParseResult(scheme='', netloc='', params=u.params, path=u.path or '/', query=u.query, fragment=u.fragment)) # Connect to destination self._proxy_sock = socket() self._proxy_sock.settimeout(10) self._proxy_sock.connect((self.hostname, int(self.port))) # Wrap socket if SSL is required if self.is_connect: self._proxy_sock = wrap_socket(self._proxy_sock)
def urlparse(urlstring, scheme='', allow_fragments=True, *args, **kwargs): """A wrapper for :py:func:`urlparse.urlparse` with the following differences: * Handles buckets in S3 URIs correctly. (:py:func:`~urlparse.urlparse` does this correctly sometime after 2.6.1; this is just a patch for older Python versions.) * Splits the fragment correctly in all URIs, not just Web-related ones. This behavior was fixed in the Python 2.7.4 standard library but we have to back-port it for previous versions. """ # we're probably going to mess with at least one of these values and # re-pack the whole thing before we return it. # NB: urlparse_buggy()'s second argument changes names from # 'default_scheme' to 'scheme' in Python 2.6, so urlparse_buggy() should # be called with positional arguments. (scheme, netloc, path, params, query, fragment) = (urlparse_buggy(urlstring, scheme, allow_fragments, *args, **kwargs)) if netloc == '' and path.startswith('//'): m = NETLOC_RE.match(path) netloc = m.group(1) path = m.group(2) if allow_fragments and '#' in path and not fragment: path, fragment = path.split('#', 1) return ParseResult(scheme, netloc, path, params, query, fragment)
def __init__(self, host='127.0.0.1', port=9333, prefetch_volumeIds=False): """ Arguments: - `host`: - `port`: - `prefetch_volumeIds`: if True, prefech volumeIds to cache: self.volumes_cache """ self.host = host self.port = port self.url_base_parts = ParseResult(scheme='http', netloc='%s:%d' % (host, port), path='', params='', query='', fragment='') self.url_base = urlunparse(self.url_base_parts) self.url_assign = urljoin(self.url_base, '/dir/assign') self.url_lookup = urljoin(self.url_base, '/dir/lookup') self.url_vacuum = urljoin(self.url_base, '/vol/vacuum') self.url_status = urljoin(self.url_base, '/dir/status') # volumes usually do not move, so we cache it here for 1 minute. self.volumes_cache = {} if prefetch_volumeIds: LOGGER.info("prefetching volumeIds(['1' : '10'] into cache") for i in range(10): self.lookup(str(i + 1))
def url(self, path=''): parsed = urlparse(self._url) p = parsed.path.split('/') p += path.replace('/', ' ').strip().split() d = parsed._asdict() d['path'] = '/'.join(p) return ParseResult(**d).geturl()
def generic(self, method, path, data='', content_type='application/octet-stream', secure=False, **extra): environ = self._base_environ(PATH_INFO=path, **extra) from urlparse import ParseResult with patch('django.test.client.urlparse') as mock: mock.return_value = ParseResult( scheme=environ['wsgi.url_scheme'], netloc=environ['SERVER_NAME'], path=environ['PATH_INFO'], params='', query='QUERY_STRING' in environ and environ['QUERY_STRING'] or '', fragment='') return super(SymClient, self).generic(method, path, data, content_type=content_type, secure=secure, **extra)
def test_get_out_going_edges_works_fine(self): valid_links = [ "http://nonexistingurl.com/about.html", "http://nonexistingurl.com/help.html", "./products.html", "/samples.html", "http://nonexistingurl.com/path?a=b#dummy" ] anchors = "" for link in valid_links: anchors += "<a href=\"{0}\">link</a>\n".format(link) html = "<html><body>{0}</body></html>".format(anchors) expected = [ "http://nonexistingurl.com/about.html", "http://nonexistingurl.com/help.html", "http://nonexistingurl.com/products.html", "http://nonexistingurl.com/samples.html", "http://nonexistingurl.com/path", ] subject = SiteMap() subject.set_start_page("nonexistingurl.com") url = ParseResult(scheme='http', netloc='nonexistingurl.com', path='', params='', query='', fragment='') actual = subject.get_out_going_edges(url, html) for e in expected: self.assertTrue(e in actual)
def sign(self, key, expiration=90, **extensions): """ Sign the URL using the specified private RSA key. Has the format of: <url>?policy=<policy>;signature=<signature>. The *policy* is: {resource: <resource>, expiration: <seconds>, extensions: <ext>} The *resource* is the path?query in the original URL. The *expiration* is: seconds since epoch. The *signature* is RSA signature of the SHA256 digest of the json/base64 encoded policy. :param key: A private RSA key. :type key: RSA.RSA :param expiration: The signature expiration in seconds. :type expiration: int :param extensions: Optional policy extensions. :type extensions: dict :return: The signed URL. :rtype: SignedURL """ expiration = int(time() + expiration) policy = Policy(self.resource, expiration) policy.extensions = extensions policy, signature = policy.sign(key) query = Query.decode(self.query) query[URL.SIGNATURE] = signature query[URL.POLICY] = policy signed = ParseResult(scheme=self.scheme, netloc=self.netloc, path=self.path, params=self.params, query=Query.encode(query), fragment='') return SignedURL(urlunparse(signed))
def handle_implicit(request, client, redirection_uri, state=None): parts = urlparse(redirection_uri.uri) fparams = dict(state=None) user_id = authenticated_userid(request) token = Oauth2Token(client, user_id) db.add(token) db.flush() fparams["access_token"] = token.access_token fparams["token_type"] = "bearer" fparams["expires_in"] = token.expires_in if state: fparams["state"] = state parts = ParseResult(parts.scheme, parts.netloc, parts.path, parts.params, "", urlencode(fparams)) return HTTPFound(location=parts.geturl())
def rel_to_abs(start_path, relative_url): """converts a relative url at a specified (absolute) location params: start_path - the absolute path from which the relative url is being accessed relative_url - the relative url on the page""" remove_null = lambda x: bool(x) parsed_start_url = urlparse(start_path) path_items = filter(remove_null, parsed_start_url.path.split('/')) path_items += [relative_url] new_path = '/'.join(path_items) parsed_abs_url = ParseResult( scheme=parsed_start_url.scheme, netloc=parsed_start_url.netloc, path=new_path, params=parsed_start_url.params, query=parsed_start_url.query, fragment=parsed_start_url.fragment) return parsed_abs_url.geturl()
def replace_netloc(self, netloc): u""" Replace network location of the media asset URI. **Example usage** >>> import copy >>> from .utils_test import CALLBACK_TEST >>> callback = copy.copy(CALLBACK_TEST) >>> callback.is_valid(True) True >>> print(callback.url) http://127.0.0.1/media >>> callback.replace_netloc(u'129.194.185.47:5003') >>> print(callback.url) http://129.194.185.47:5003/media """ url = urlparse(self.url) url = ParseResult(url.scheme, netloc, url.path, url.params, url.query, url.fragment) self.url = url.geturl()
def add_params(self, params_dict): new_query = urlencode(dict(params_dict, **self.parsed_query())) # create a new parse result with the altered query # TODO there has to be a better way self.parsed_uri = ParseResult(query=new_query, scheme=self.parsed_uri.scheme, netloc=self.parsed_uri.netloc, path=self.parsed_uri.path, params=self.parsed_uri.params, fragment=self.parsed_uri.fragment)
def parts_to_url(scheme, netloc, path, params, query, fragment): p = ParseResult(scheme, netloc, path, params, query, fragment) return p.geturl()
class RedirectURI(Validatable): """ wrapper object for the redirect_uri parameter as part of the authorization request """ def __init__(self, uri, settings): self.raw_uri = uri self.parsed_uri = urlparse(uri) self.error_message = None self.settings = settings self.error_responses = settings['error_responses']['redirect_uri'] # used in super class is_valid def validate(self): self.error_message = self.error_responses.get(self.determine_errors(), None) def determine_errors(self): if not self.is_permitted_site(): return 'invalid' if not self.is_absolute(): return 'not_absolute' def add_params(self, params_dict): new_query = urlencode(dict(params_dict, **self.parsed_query())) # create a new parse result with the altered query # TODO there has to be a better way self.parsed_uri = ParseResult(query=new_query, scheme=self.parsed_uri.scheme, netloc=self.parsed_uri.netloc, path=self.parsed_uri.path, params=self.parsed_uri.params, fragment=self.parsed_uri.fragment) # NOTE this will truncate query params that are used more than once # generally not a good idea def parsed_query(self): new_query = parse_qs(self.parsed_uri.query) for key, value in new_query.iteritems(): new_query[key] = value if len(new_query[key]) < 1 else join(value, ",") return new_query def is_absolute(self): return self.parsed_uri.scheme != "" and self.parsed_uri.netloc != "" def is_permitted_site(self): # the oauth 2 spec recommends validating the redirect uri against # a pre defined uri to prevent an open redirect for the sake of simplicity # we've chosed to forgo that validation # # # return self.raw_uri.startswith(self.settings['redirect_site']) return True def get_url(self): return self.parsed_uri.geturl()