def split(url): """ Splits the path between root (scheme + FQDN) and rest of the URL Example:: >>> split('http://example.com/foo') ('http://example.com', '/foo') >>> split('/foo/bar') ('', '/foo/bar') >>> split('https://*****:*****@www.test.com/foo/bar') ('https://*****:*****@www.test.com', '/foo/bar') >>> split('http://localhost/?foo=bar') ('http://localhost', '/?foo=bar') >>> split('http://localhost/foo?bar=baz') ('http://localhost', '/foo?bar=baz') >>> split('http://localhost?foo=bar') ('http://localhost', '/?foo=bar') :param url: URL to strip the root from :returns: path with root stripped """ parsed = urlparse.urlparse(url) base = urlparse.urlunparse(mask(parsed, ROOT_MASK)) stripped = urlparse.urlunparse(mask(parsed, TAIL_MASK)) if stripped[0] != '/': stripped = '/' + stripped return base, stripped
def load_posts(self, *, web_driver: WebDriver = None, **params) -> List[Post]: params.setdefault('owner_id', -self.group_id) if web_driver is None: raw_posts = self.get_all_objects('wall.get', **params) else: open_url('https://vk.com', web_driver) login = web_driver.find_element_by_xpath('//*[@id="index_email"]') login.clear() login.send_keys(self.user_login) password = web_driver.find_element_by_xpath('//*[@id="index_pass"]') password.clear() password.send_keys(self.user_password) web_driver.find_element_by_xpath('//*[@id="index_login_button"]').click() url_parts = list(urlparse('https://vk.com/dev/wall.get')) count = 100 query = {'params[owner_id]': params['owner_id'], 'params[count]': count, 'params[offset]': params.get('offset', 0), 'params[filter]': params.get('filter', 'owner'), 'params[fields]': params.get('fields', ''), 'params[v]': self.api_version} url_parts[4] = urlencode(query) url = urlunparse(url_parts) response = parse_from_vk_dev(url, web_driver)['response'] total_count = response['count'] raw_posts = response['items'] while len(raw_posts) < total_count: query['params[offset]'] += count url_parts[4] = urlencode(query) url = urlunparse(url_parts) response = parse_from_vk_dev(url, web_driver)['response'] raw_posts += response['items'] return [Post.from_raw(raw_post) for raw_post in raw_posts]
def open_with_auth2(url): """ Open a urllib2 request, handling HTTP authentication In this version, user-agent is ignored """ scheme, netloc, path, params, query, frag = urlparse(url) if scheme in ('http', 'https'): auth, host = splituser(netloc) else: auth = None if auth: auth = "Basic " + unquote(auth).encode('base64').strip() new_url = urlunparse((scheme, host, path, params, query, frag)) request = Request(new_url) request.add_header("Authorization", auth) else: request = Request(url) # request.add_header('User-Agent', user_agent) fp = urlopen(request) if auth: # Put authentication info back into request URL if same host, # so that links found on the page will work s2, h2, path2, param2, query2, frag2 = urlparse(fp.url) if s2 == scheme and h2 == host: fp.url = urlunparse((s2, netloc, path2, param2, query2, frag2)) return fp
def __getattr__(self, name): if name == "urlWithoutVariables": return urlunparse((self.schema, self.__host, self.__path, '', '', '')) elif name == "pathWithVariables": return urlunparse(('', '', self.__path, '', self.__variablesGET.urlEncoded(), '')) elif name == "completeUrl": return urlunparse((self.schema, self.__host, self.__path, self.__params, self.__variablesGET.urlEncoded(), '')) elif name == "finalUrl": if self.__finalurl: return self.__finalurl return self.completeUrl elif name == "urlWithoutPath": return "%s://%s" % (self.schema, self._headers["Host"]) elif name == "path": return self.__path elif name == "postdata": if self._non_parsed_post is not None: return self._non_parsed_post if self.ContentType == "application/x-www-form-urlencoded": return self.__variablesPOST.urlEncoded() elif self.ContentType == "multipart/form-data": return self.__variablesPOST.multipartEncoded() elif self.ContentType == 'application/json': return self.__variablesPOST.json_encoded() else: return self.__variablesPOST.urlEncoded() else: raise AttributeError
def request(url, method_name, debug=False, session_kwargs=None, **kwargs): session_kwargs = session_kwargs or {} parsed = urlparse(url) host = urlunparse(parsed[:2] + ('', '', '', '')) session = HttpSession(host, **session_kwargs) session.debug = debug path = urlunparse(('', '') + parsed[2:]) return getattr(session, method_name)(path, **kwargs)
def _request(self, path, method='GET', **params): """ Make an API request """ base = "{0}/{1}".format(self.baseurl, path) # normalise the method argument method = method.upper().strip() # parse the existing url urlparts = urlparse(base) qstr = parse_qs(urlparts.query) # add the token to the query string qstr['token'] = self.apikey if method not in APIClient.HTTP_REQUEST: qstr['_method'] = method else: try: del qstr['_method'] except KeyError: pass if method in APIClient.ANY_GET_REQUEST: # if it's a get request then update the query string with # the params qstr.update(params) # all of the params go in the query string query_string = APIClient.urlencode(qstr) # reconstruct the url url = urlunparse((urlparts.scheme, urlparts.netloc, urlparts.path, urlparts.params, query_string, "")) # empty fragment log.debug("Making GET request to {0}".format(url)) resp, content = self.http.request(url, "GET", headers=self.USER_AGENT_HEADER) else: # all of the params go in the query string query_string = APIClient.urlencode(qstr) # reconstruct the url url = urlunparse((urlparts.scheme, urlparts.netloc, urlparts.path, urlparts.params, query_string, "")) # empty fragment log.debug("Making POST request to {0}".format(url)) resp, content = self.http.request(url, "POST", urlencode(params), headers=self.USER_AGENT_HEADER) status = int(resp['status']) if status != 200: raise APIError("An unknown error occurred") return resp, json.loads(content)
def getPath(self, url): target = urlparse(url) targetPath = (target.path if target.path == "" or target.path.startswith("/") else os.getcwd() + "/" + target.path) fromUrlPart = urlunparse((self.url.scheme or "file", self.url.netloc, self.url.path, "", "", "")) toUrlPart = urlunparse((target.scheme or "file", target.netloc, targetPath, "", "", "")) diffUrl = urljoin(fromUrlPart, toUrlPart) diffUrl = None if diffUrl == fromUrlPart else diffUrl return (diffUrl, target.fragment)
def open_help(self, page, anchor): """ Opens the specified help page. If <page> cannot be found, an error page will be opened. <anchor> will be attached to the url to enable the user to navigate inside a single html-page. The anchor will only be added, if one of the preferred browser is installed on the system (in order to prevent errors or strange behaviour). :param page: The page to open. The page is a html-file inside the folder 'help/<lang-code>/'. :param anchor: The anchor to attach to the url. Will be ignored if none of the prefered browsers is installed :return: The opened url as string """ # First we create the path to the requested file. The file always is in # 'help/<lang>/'. If the file does exist, we append its name to the # path, if it does not exist, we append 'error.html', so the error-page # in the requested language will be displayed. requested = realpath(join(self.folder, str(page))) if not _does_file_exist_(requested): file_name = join(self.folder, "error.html") anchor = '' # the error page does not need anchors else: file_name = requested # Here we try to get a handle for one of the prefered web browsers. # If we can get one, we take the handle and stop searching for another # handle. If we cannot get a handle (which means the browser is not # installed, we print an error message and take the next browser. web = None for browser in PREFERRED_BROWSERS: try: web = get(browser) break except Error: print("Was not able to open browser <" + str(browser) + ">. Trying another...") # Here we open the url. The variable <web> will be <None> if we did not # find a handle for a prefered browser. So we open the url without # attaching the anchor (by requesting an automatic handle). Otherwise we # can append the url, because we can use one of the prefered browsers. if web: url = urlunparse(['file', '', realpath(file_name), '', '', str(anchor)]) web.open(url) return str(url) else: url = urlunparse(['file', '', realpath(file_name), '', '', '']) open(url) return str(url)
def _generate_url(self, user_ids): user_ids = [str(user_id) for user_id in user_ids] params = {'user_ids': ','.join(user_ids), 'v': '5.45', 'access_token': VK_ACCESS_TOKEN} url_parts = list(urlparser.urlparse(self.VK_PHOTOS_API_URL)) query = dict(urlparser.parse_qsl(url_parts[4])) query.update(params) url_parts[4] = urlencode(query) print(urlparser.urlunparse(url_parts)) return urlparser.urlunparse(url_parts)
def output(self, key, obj): if self.data_func: data = self.data_func(obj) o = urlparse( url_for(self.endpoint, _external=self.absolute, **data) ) if self.absolute: scheme = self.scheme if self.scheme is not None else o.scheme return urlunparse( (scheme, o.netloc, o.path, "", "", "")) return urlunparse(("", "", o.path, "", "", "")) else: return super(AbsoluteUrl, self).output(key, obj)
def campus_search(): query = request.args.get('q') skip = int(request.args.get('skip', 0)) limit = int(request.args.get('limit', 20)) data = [] if not query: campuses = Campus.objects().skip(skip).limit(limit) else: campuses = [] for campus in campuses: campus_data = { 'id': str(campus.id), 'university': { 'name': campus.univ_name, 'type': campus.univ_type }, 'campus': { 'name': campus.campus_name } } if campus.domain: campus_data['domain'] = campus.domain data.append(campus_data) result = { 'data': data, 'paging': {} } if skip > 0: url = list(parse.urlparse(request.url)) query = dict(parse.parse_qs(url[4])) query['skip'] = skip - limit query['limit'] = limit if query['skip'] < 0: query['skip'] = 0 url[4] = parse.urlencode(query) result['paging']['previous'] = parse.urlunparse(url) if len(data) >= limit: url = list(parse.urlparse(request.url)) query = dict(parse.parse_qs(url[4])) query['skip'] = skip + len(data) query['limit'] = limit url[4] = parse.urlencode(query) result['paging']['next'] = parse.urlunparse(url) return jsonify(result)
def url(self, val): """ Parse url and set the url info into Page's object instances :type val : str """ parsed_absolute_url = urlparse(self.to_absolute_url(val)) #- Set: url, scheme, host, root_url self._scheme = parsed_absolute_url.scheme self._host = parsed_absolute_url.hostname self._root_url = urlunparse((parsed_absolute_url.scheme, parsed_absolute_url.netloc, "/", '', '', '')) normalized_path = parsed_absolute_url.path if parsed_absolute_url.path else "/" self._url = urlunparse((self._scheme, self.host, normalized_path, parsed_absolute_url.params, parsed_absolute_url.query, parsed_absolute_url.fragment))
def __repr__(self): if self.version: version = str(self.version) else: version = 'None' if self.url: url = str(urlunparse(self.url)) elif self.unresolved_url: url = str(urlunparse(self.unresolved_url)) else: url = 'Uninitialized' return self.__class__.__name__ + "(" + version + ', ' + url + ")"
def clean_urls(article_content, article_link): parsed_article_url = urlparse(article_link) parsed_content = BeautifulSoup(article_content, "html.parser") for img in parsed_content.find_all("img"): if "src" not in img.attrs: continue if is_secure_served() and "srcset" in img.attrs: # removing active content when serving over https del img.attrs["srcset"] to_rebuild, img_src = False, urlparse(img.attrs["src"]) if not img_src.scheme or not img_src.netloc: to_rebuild = True # either scheme or netloc are missing from the src of the img scheme = img_src.scheme or parsed_article_url.scheme netloc = img_src.netloc or parsed_article_url.netloc img_src = ParseResult( scheme=scheme, netloc=netloc, path=img_src.path, query=img_src.query, params=img_src.params, fragment=img_src.fragment, ) if to_rebuild: img.attrs["src"] = urlunparse(img_src) if is_secure_served(): for iframe in parsed_content.find_all("iframe"): if "src" not in iframe.attrs: continue iframe_src = urlparse(iframe.attrs["src"]) if iframe_src.scheme != "http": continue for domain in HTTPS_IFRAME_DOMAINS: if domain not in iframe_src.netloc: continue iframe_src = ParseResult( scheme="https", netloc=iframe_src.netloc, path=iframe_src.path, query=iframe_src.query, params=iframe_src.params, fragment=iframe_src.fragment, ) iframe.attrs["src"] = urlunparse(iframe_src) break return str(parsed_content)
def make_absolute_redirect_uri(self, uri): """Make absolute redirect URIs internal redirect uris, e.g. `/user/foo/oauth_handler` are allowed in jupyterhub, but oauthlib prohibits them. Add `$HOST` header to redirect_uri to make them acceptable. Currently unused in favor of monkeypatching oauthlib.is_absolute_uri to skip the check """ redirect_uri = self.get_argument('redirect_uri') if not redirect_uri or not redirect_uri.startswith('/'): return uri # make absolute local redirects full URLs # to satisfy oauthlib's absolute URI requirement redirect_uri = ( self.request.protocol + "://" + self.request.headers['Host'] + redirect_uri ) parsed_url = urlparse(uri) query_list = parse_qsl(parsed_url.query, keep_blank_values=True) for idx, item in enumerate(query_list): if item[0] == 'redirect_uri': query_list[idx] = ('redirect_uri', redirect_uri) break return urlunparse(urlparse(uri)._replace(query=urlencode(query_list)))
def _update_url_query_param(url, query_params): url_parts = parse.urlparse(url) old_qs_args = dict(parse.parse_qsl(url_parts[4])) old_qs_args.update(query_params) new_qs = parse.urlencode(old_qs_args) return parse.urlunparse( list(url_parts[0:4]) + [new_qs] + list(url_parts[5:]))
def connect(self, endpoint="", symbol="XBTN15", shouldAuth=True): '''Connect to the websocket and initialize data stores.''' self.logger.debug("Connecting WebSocket.") self.symbol = symbol self.shouldAuth = shouldAuth # We can subscribe right in the connection querystring, so let's build that. # Subscribe to all pertinent endpoints subscriptions = [sub + ':' + symbol for sub in ["quote", "trade"]] subscriptions += ["instrument"] # We want all of them if self.shouldAuth: subscriptions += [sub + ':' + symbol for sub in ["order", "execution"]] subscriptions += ["margin", "position"] # Get WS URL and connect. urlParts = list(urlparse(endpoint)) urlParts[0] = urlParts[0].replace('http', 'ws') urlParts[2] = "/realtime?subscribe=" + ",".join(subscriptions) wsURL = urlunparse(urlParts) self.logger.info("Connecting to %s" % wsURL) self.__connect(wsURL) self.logger.info('Connected to WS. Waiting for data images, this may take a moment...') # Connected. Wait for partials self.__wait_for_symbol(symbol) if self.shouldAuth: self.__wait_for_account() self.logger.info('Got all market data. Starting.')
def clean_service_url(url): """ Return only the scheme, hostname and (optional) port components of the parameter URL. """ parts = urlparse(url) return urlunparse((parts.scheme, parts.netloc, '', '', '', ''))
def _parseURL(url): try: url = urinorm.urinorm(url) except ValueError: return None proto, netloc, path, params, query, frag = urlparse(url) if not path: # Python <2.4 does not parse URLs with no path properly if not query and '?' in netloc: netloc, query = netloc.split('?', 1) path = '/' path = urlunparse(('', '', path, params, query, frag)) if ':' in netloc: try: host, port = netloc.split(':') except ValueError: return None if not re.match(r'\d+$', port): return None else: host = netloc port = '' host = host.lower() if not host_segment_re.match(host): return None return proto, host, port, path
def first_line(self): if not self._proxy and self.method != self.CONNECT: url = urlunparse(('', '', self.path or '/', self.params, self.query, self.fragment)) else: url = self.full_url return '%s %s %s' % (self.method, url, self.version)
def output(self, key, obj): data = self._get_data(obj) data[self.key] = getattr(obj, self.attribute) endpoint = self.endpoint or request.endpoint # parse the url to remove the query string o = urlparse(url_for(endpoint, **data)) return urlunparse(("", "", o.path, "", "", ""))
def relative_uri(source, target): """ Make a relative URI from source to target. """ su = urlparse.urlparse(source) tu = urlparse.urlparse(target) extra = list(tu[3:]) relative = None if tu[0] == '' and tu[1] == '': if tu[2] == su[2]: relative = '' elif not tu[2].startswith('/'): relative = tu[2] elif su[0:2] != tu[0:2]: return target if relative is None: if tu[2] == su[2]: relative = '' else: relative = os.path.relpath(tu[2], os.path.dirname(su[2])) if relative == '.': relative = '' relative = urlparse.urlunparse(["", "", relative] + extra) return relative
def execute(self, operation, parameters=None): """Prepare and execute a database operation (query or command). Return values are not defined. """ headers = { 'X-Presto-Catalog': self._catalog, 'X-Presto-Schema': self._schema, 'X-Presto-Source': self._source, 'X-Presto-User': self._username, } if self._session_props: headers['X-Presto-Session'] = ','.join( '{}={}'.format(propname, propval) for propname, propval in self._session_props.items() ) # Prepare statement if parameters is None: sql = operation else: sql = operation % _escaper.escape_args(parameters) self._reset_state() self._state = self._STATE_RUNNING url = urlparse.urlunparse(( 'http', '{}:{}'.format(self._host, self._port), '/v1/statement', None, None, None)) _logger.info('%s', sql) _logger.debug("Headers: %s", headers) response = requests.post(url, data=sql.encode('utf-8'), headers=headers) self._process_response(response)
def download(self, source, dest): """ Download an archive file. :param str source: URL pointing to an archive file. :param str dest: Local path location to download archive file to. """ # propogate all exceptions # URLError, OSError, etc proto, netloc, path, params, query, fragment = urlparse(source) if proto in ('http', 'https'): auth, barehost = splituser(netloc) if auth is not None: source = urlunparse((proto, barehost, path, params, query, fragment)) username, password = splitpasswd(auth) passman = HTTPPasswordMgrWithDefaultRealm() # Realm is set to None in add_password to force the username and password # to be used whatever the realm passman.add_password(None, source, username, password) authhandler = HTTPBasicAuthHandler(passman) opener = build_opener(authhandler) install_opener(opener) response = urlopen(source) try: with open(dest, 'w') as dest_file: dest_file.write(response.read()) except Exception as e: if os.path.isfile(dest): os.unlink(dest) raise e
def assign_lang(url,lang): """ assign a language to a dbpedia resource that does not have one. EXAMPLE ======= >>> assign_lang('http://dbpedia.org/resource/Montreal','en') 'http://en.dbpedia.org/resource/Montreal' >>> assign_lang('http://fr.dbpedia.org/resource/Montreal','en') 'http://fr.dbpedia.org/resource/Montreal' """ url_parsed = urlparse(url) domain = url_parsed.netloc domain_parts = domain.split(".") if len(domain_parts) == 3: return url else: new_netloc = ".".join([lang,domain]) new_url = urlunparse( (url_parsed.scheme, new_netloc, url_parsed.path, url_parsed.params, url_parsed.query, url_parsed.fragment)) return new_url
def add_url(self,): """ add url(s) to the upload list. using a text box. """ url_input = UrlInput() code = url_input.exec_() urls = url_input.text() new_entrys = [] not_added = [] if code and urls != '': for url in urls.split('\n'): # skip empty lines if url == '': continue parsed_url = urlparse(url, scheme='http') scheme = parsed_url.scheme.lower() if scheme in ['http', 'https', 'ftp']: new_entrys.append((urlunparse(parsed_url), 'url')) else: not_added.append(url) if not_added: message = QMessageBox(QMessageBox.Warning, 'Fehler', ('Ein oder mehrere link(s) konnten ' 'nicht hinzugefügt werden.'), buttons=QMessageBox.Ok, parent=self) message.setDetailedText('\n'.join(not_added)) self.list_view_files_model.add_files(new_entrys)
def purge(self, url): url_parsed = urlparse(url) host = url_parsed.hostname # Append port to host if it is set in the original URL if url_parsed.port: host += (':' + str(url_parsed.port)) request = PurgeRequest( url=urlunparse([ self.cache_scheme, self.cache_netloc, url_parsed.path, url_parsed.params, url_parsed.query, url_parsed.fragment ]), headers={ 'Host': host, 'User-Agent': 'Wagtail-frontendcache/' + __version__ } ) try: urlopen(request) except HTTPError as e: logger.error("Couldn't purge '%s' from HTTP cache. HTTPError: %d %s", url, e.code, e.reason) except URLError as e: logger.error("Couldn't purge '%s' from HTTP cache. URLError: %s", url, e.reason)
def normalized(self): return urlparse.urlunparse([self.scheme, self.netloc, self.path, self.params, self.query, self.fragment])
def update_tracker(session_token, download_id, tracker): announce_url = tracker['announce'] parts = list(urlparse(announce_url)) parts[1] = NEW_TRACKER_HOST new_announce = urlunparse(parts) print("> UPDATE tracker %s ==> %s" % (announce_url, new_announce)) # add new tracker url = MAFREEBOX_API_URL + ("downloads/%d/trackers" % download_id) rep = requests.post(url, json={ 'announce': new_announce, 'is_enabled': True }, headers={ 'X-Fbx-App-Auth': session_token }) get_api_result(rep) # remove prev tracker url = MAFREEBOX_API_URL + ("downloads/%d/trackers/%s" % (download_id, quote(announce_url, safe=''))) rep = requests.delete(url, headers={ 'X-Fbx-App-Auth': session_token }) get_api_result(rep) # active new tracker url = MAFREEBOX_API_URL + ("downloads/%d/trackers/%s" % (download_id, quote(new_announce, safe=''))) rep = requests.delete(url, json={ 'is_enabled': True }, headers={ 'X-Fbx-App-Auth': session_token }) get_api_result(rep)
def __init__(self, **kwargs): """Create the Tree from SVG ``text``.""" bytestring = kwargs.get('bytestring') file_obj = kwargs.get('file_obj') url = kwargs.get('url') unsafe = kwargs.get('unsafe') parent = kwargs.get('parent') parent_children = kwargs.get('parent_children') tree_cache = kwargs.get('tree_cache') element_id = None self.url_fetcher = kwargs.get('url_fetcher', fetch) if bytestring is not None: self.url = url elif file_obj is not None: bytestring = file_obj.read() self.url = getattr(file_obj, 'name', None) if self.url == '<stdin>': self.url = None elif url is not None: parent_url = parent.url if parent else None parsed_url = parse_url(url, parent_url) if parsed_url.fragment: self.url = urlunparse(parsed_url[:-1] + ('',)) element_id = parsed_url.fragment else: self.url = parsed_url.geturl() element_id = None self.url = self.url or None else: raise TypeError( 'No input. Use one of bytestring, file_obj or url.') if parent and self.url == parent.url: root_parent = parent while root_parent.parent: root_parent = root_parent.parent tree = root_parent.xml_tree else: if not bytestring: bytestring = self.fetch_url( parse_url(self.url), 'image/svg+xml') if len(bytestring) >= 2 and bytestring[:2] == b'\x1f\x8b': bytestring = gzip.decompress(bytestring) parser = ElementTree.XMLParser( resolve_entities=unsafe, huge_tree=unsafe) tree = ElementTree.fromstring(bytestring, parser) remove_svg_namespace(tree) self.xml_tree = tree apply_stylesheets(self) if element_id: self.xml_tree = tree.find(".//*[@id='{}']".format(element_id)) if self.xml_tree is None: raise TypeError( 'No tag with id="{}" found.'.format(element_id)) super().__init__( self.xml_tree, self.url_fetcher, parent, parent_children, self.url) self.root = True if tree_cache is not None and self.url: tree_cache[(self.url, self.get('id'))] = self
def url_for( self, view_name: str, _anchor: str = "", _external: bool = False, _scheme: str = "", _server: str = None, _method: object = None, **kwargs ): # ? i think this should be in the Router uri, route = self.router.find_route_by_view_name(view_name) if not (uri and route): raise URLBuildError( "Endpoint with name `{}` was not found".format(view_name) ) if _scheme and not _external: raise ValueError("When specifying _scheme, _external must be True") if _server is None and _external: _server = self.config.get("SERVER_NAME", "") if _external: if not _scheme: if ":" in _server[:8]: _scheme = _server[:8].split(":", 1)[0] else: _scheme = "http" if "://" in _server[:8]: _server = _server.split("://", 1)[-1] orig_uri = uri replaced_vars = [] for k in kwargs: if re.search(r"([:|\*])", str(kwargs[k])): raise URLBuildError( "The parameter '{}' passed for URL `{}` with the value of " "'{}' may contain invalid characters that can break the " "URL".format(k, orig_uri, kwargs[k]) ) m = re.search(r"([:|\*]{})".format(k), uri) if m: replaced_vars.append(k) uri = uri.replace(m.group(0), str(kwargs[k])) # else log ? for k in replaced_vars: del kwargs[k] if uri.find(":") > -1 or uri.find("*") > -1: raise URLBuildError( "Required parameters for URL `{}` was not passed to " "url_for".format(orig_uri) ) # parse the remainder of the keyword arguments into a querystring query_string = urlencode(kwargs, doseq=True) if kwargs else "" # scheme://netloc/path;parameters?query#fragment return urlunparse((_scheme, _server, uri, "", query_string, _anchor))
from urllib.parse import urlunparse #长度必须为6 如果是split 长度必须是5 data = ['http', 'www.baidu.com', 'index.html', 'user', 'a=6', 'comment'] print(urlunparse(data)) #http://www.baidu.com/index.html;user?a=6#comment
def __init__(self, hub, name=None, description=None, metadata=None, addr=None, port=0, callable=True): # GENERAL self._is_running = False self._is_registered = False if metadata is None: metadata = {} if name is not None: metadata["samp.name"] = name if description is not None: metadata["samp.description.text"] = description self._metadata = metadata self._addr = addr self._port = port self._xmlrpcAddr = None self._callable = callable # HUB INTERACTION self.client = None self._public_id = None self._private_key = None self._hub_id = None self._notification_bindings = {} self._call_bindings = { "samp.app.ping": [self._ping, {}], "client.env.get": [self._client_env_get, {}] } self._response_bindings = {} self._host_name = "127.0.0.1" if internet_on(): try: self._host_name = socket.getfqdn() socket.getaddrinfo(self._addr or self._host_name, self._port or 0) except socket.error: self._host_name = "127.0.0.1" self.hub = hub if self._callable: self._thread = threading.Thread(target=self._serve_forever) self._thread.daemon = True self.client = ThreadingXMLRPCServer( (self._addr or self._host_name, self._port), logRequests=False, allow_none=True) self.client.register_introspection_functions() self.client.register_function(self.receive_notification, 'samp.client.receiveNotification') self.client.register_function(self.receive_call, 'samp.client.receiveCall') self.client.register_function(self.receive_response, 'samp.client.receiveResponse') # If the port was set to zero, then the operating system has # selected a free port. We now check what this port number is. if self._port == 0: self._port = self.client.socket.getsockname()[1] protocol = 'http' self._xmlrpcAddr = urlunparse( (protocol, '{0}:{1}'.format(self._addr or self._host_name, self._port), '', '', '', ''))
def _login_redirect_url(target, login_url_parts, redirect_field_name=REDIRECT_FIELD_NAME): querystring = QueryDict(login_url_parts[4], mutable=True) querystring[redirect_field_name] = target login_url_parts[4] = querystring.urlencode(safe='/') return urlunparse(login_url_parts)
def urlclean(url): "Remove fragment, params, and querystring from `url` if present" return urlunparse(urlparse(url)[:3] + ('', '', ''))
def _get_url(self, identity_api_version): url = parse.urlparse(self.cli_clients.uri) return parse.urlunparse( (url.scheme, url.netloc, '/identity/v%s' % identity_api_version, url.params, url.query, url.fragment))
def relative_location(response): url = urlparse(response.location) return urlunparse(['', '', url.path, url.params, url.query, url.fragment])
def clean_link(self, link): l = urlparse(link) l_res = list(l) l_res[2] = l_res[2].replace("./", "/") l_res[2] = l_res[2].replace("//", "/") return urlunparse(l_res)
def _connect_coro(self): kwargs = dict() # Decode URI attributes uri_attributes = urlparse(self.session.broker_uri) scheme = uri_attributes.scheme secure = True if scheme in ('mqtts', 'wss') else False self.session.username = uri_attributes.username self.session.password = uri_attributes.password self.session.remote_address = uri_attributes.hostname self.session.remote_port = uri_attributes.port if scheme in ('mqtt', 'mqtts') and not self.session.remote_port: self.session.remote_port = 8883 if scheme == 'mqtts' else 1883 if scheme in ('ws', 'wss') and not self.session.remote_port: self.session.remote_port = 443 if scheme == 'wss' else 80 if scheme in ('ws', 'wss'): # Rewrite URI to conform to https://tools.ietf.org/html/rfc6455#section-3 uri = (scheme, self.session.remote_address + ":" + str(self.session.remote_port), uri_attributes[2], uri_attributes[3], uri_attributes[4], uri_attributes[5]) self.session.broker_uri = urlunparse(uri) # Init protocol handler #if not self._handler: self._handler = ClientProtocolHandler(self.plugins_manager, loop=self._loop) if secure: if self.session.cafile is None or self.session.cafile == '': self.logger.warn( "TLS connection can't be estabilshed, no certificate file (.cert) given" ) raise ClientException( "TLS connection can't be estabilshed, no certificate file (.cert) given" ) sc = ssl.create_default_context(ssl.Purpose.SERVER_AUTH, cafile=self.session.cafile, capath=self.session.capath, cadata=self.session.cadata) if 'certfile' in self.config and 'keyfile' in self.config: sc.load_cert_chain(self.config['certfile'], self.config['keyfile']) if 'check_hostname' in self.config and isinstance( self.config['check_hostname'], bool): sc.check_hostname = self.config['check_hostname'] kwargs['ssl'] = sc try: reader = None writer = None self._connected_state.clear() # Open connection if scheme in ('mqtt', 'mqtts'): conn_reader, conn_writer = \ yield from asyncio.open_connection( self.session.remote_address, self.session.remote_port, loop=self._loop, **kwargs) reader = StreamReaderAdapter(conn_reader) writer = StreamWriterAdapter(conn_writer) elif scheme in ('ws', 'wss'): websocket = yield from websockets.connect( self.session.broker_uri, subprotocols=['mqtt'], loop=self._loop, **kwargs) reader = WebSocketsReader(websocket) writer = WebSocketsWriter(websocket) # Start MQTT protocol self._handler.attach(self.session, reader, writer) return_code = yield from self._handler.mqtt_connect() if return_code is not CONNECTION_ACCEPTED: self.session.transitions.disconnect() self.logger.warning("Connection rejected with code '%s'" % return_code) exc = ConnectException("Connection rejected by broker") exc.return_code = return_code raise exc else: # Handle MQTT protocol yield from self._handler.start() self.session.transitions.connect() self._connected_state.set() self.logger.debug( "connected to %s:%s" % (self.session.remote_address, self.session.remote_port)) return return_code except InvalidURI as iuri: self.logger.warn("connection failed: invalid URI '%s'" % self.session.broker_uri) self.session.transitions.disconnect() raise ConnectException( "connection failed: invalid URI '%s'" % self.session.broker_uri, iuri) except InvalidHandshake as ihs: self.logger.warn("connection failed: invalid websocket handshake") self.session.transitions.disconnect() raise ConnectException( "connection failed: invalid websocket handshake", ihs) except (ProtocolHandlerException, ConnectionError, OSError) as e: self.logger.warn("MQTT connection failed: %r" % e) self.session.transitions.disconnect() raise ConnectException(e)
def _request(self, url, data=None, method=None): """ Send an HTTP request to the remote server. :Args: - method - A string for the HTTP method to send the request with. - url - The URL to send the request to. - body - The message body to send. :Returns: A dictionary with the server's parsed JSON response. """ LOGGER.debug('%s %s %s' % (method, url, data)) parsed_url = parse.urlparse(url) password_manager = None if parsed_url.username: netloc = parsed_url.hostname if parsed_url.port: netloc += ":%s" % parsed_url.port cleaned_url = parse.urlunparse( (parsed_url.scheme, netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) password_manager = url_request.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password( None, "%s://%s" % (parsed_url.scheme, netloc), parsed_url.username, parsed_url.password) request = Request(cleaned_url, data=data.encode('utf-8'), method=method) else: request = Request(url, data=data.encode('utf-8'), method=method) request.add_header('Accept', 'application/json') request.add_header('Content-Type', 'application/json;charset=UTF-8') if password_manager: opener = url_request.build_opener( url_request.HTTPRedirectHandler(), HttpErrorHandler(), url_request.HTTPBasicAuthHandler(password_manager)) else: opener = url_request.build_opener( url_request.HTTPRedirectHandler(), HttpErrorHandler()) response = opener.open(request) try: if response.code > 399 and response.code < 500: return {'status': response.code, 'value': response.read()} body = response.read().decode('utf-8').replace('\x00', '').strip() content_type = [ value for name, value in list(response.info().items()) if name.lower() == "content-type" ] if not any([x.startswith('image/png') for x in content_type]): try: data = utils.load_json(body.strip()) except ValueError: if response.code > 199 and response.code < 300: status = ErrorCode.SUCCESS else: status = ErrorCode.UNKNOWN_ERROR return {'status': status, 'value': body.strip()} assert type(data) is dict, ( 'Invalid server response body: %s' % body) assert 'status' in data, ( 'Invalid server response; no status: %s' % body) # Some of the drivers incorrectly return a response # with no 'value' field when they should return null. if 'value' not in data: data['value'] = None return data else: data = {'status': 0, 'value': body.strip()} return data finally: response.close()
def get_query_url(query_self_url, netloc): uo = urlparse(query_self_url) query_url = urlunparse( (uo.scheme, netloc, uo.path, '', '', '') ) return query_url
def make_lang_url(lang: str, rel_link: str): return urlunparse(("http", LANG_HOSTS[lang], rel_link, "", "", ""))
def make_url(filters_dict, url_as_list, get_args): get_args["filters"] = filters_string_from_dict(filters_dict) url_as_list[4] = urlencode(get_args, doseq=True) return urlunparse(url_as_list)
def get_local_uri(self, uri): parsed_uri = urlparse(uri) return urlunparse(parsed_uri._replace(netloc='localhost:{}'.format(self.port_forward(parsed_uri.netloc))))
def _request(self, method, url, body=None): """ Send an HTTP request to the remote server. :Args: - method - A string for the HTTP method to send the request with. - url - A string for the URL to send the request to. - body - A string for request body. Ignored unless method is POST or PUT. :Returns: A dictionary with the server's parsed JSON response. """ LOGGER.debug('%s %s %s' % (method, url, body)) parsed_url = parse.urlparse(url) if self.keep_alive: headers = {"Connection": 'keep-alive', method: parsed_url.path, "User-Agent": "Python http auth", "Content-type": "application/json;charset=\"UTF-8\"", "Accept": "application/json"} if parsed_url.username: auth = base64.standard_b64encode(('%s:%s' % (parsed_url.username, parsed_url.password)).encode('ascii')).decode('ascii').replace('\n', '') headers["Authorization"] = "Basic %s" % auth if body and method != 'POST' and method != 'PUT': body = None try: self._conn.request(method, parsed_url.path, body, headers) resp = self._conn.getresponse() except (httplib.HTTPException, socket.error): self._conn.close() raise statuscode = resp.status else: password_manager = None if parsed_url.username: netloc = parsed_url.hostname if parsed_url.port: netloc += ":%s" % parsed_url.port cleaned_url = parse.urlunparse((parsed_url.scheme, netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) password_manager = url_request.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(None, "%s://%s" % (parsed_url.scheme, netloc), parsed_url.username, parsed_url.password) request = Request(cleaned_url, data=body.encode('utf-8'), method=method) else: request = Request(url, data=body.encode('utf-8'), method=method) request.add_header('Accept', 'application/json') request.add_header('Content-Type', 'application/json;charset=UTF-8') if password_manager: opener = url_request.build_opener(url_request.HTTPRedirectHandler(), HttpErrorHandler(), url_request.HTTPBasicAuthHandler(password_manager)) else: opener = url_request.build_opener(url_request.HTTPRedirectHandler(), HttpErrorHandler()) resp = opener.open(request, timeout=self._timeout) statuscode = resp.code if not hasattr(resp, 'getheader'): if hasattr(resp.headers, 'getheader'): resp.getheader = lambda x: resp.headers.getheader(x) elif hasattr(resp.headers, 'get'): resp.getheader = lambda x: resp.headers.get(x) data = resp.read() try: if 300 <= statuscode < 304: return self._request('GET', resp.getheader('location')) body = data.decode('utf-8').replace('\x00', '').strip() if 399 < statuscode <= 500: return {'status': statuscode, 'value': body} content_type = [] if resp.getheader('Content-Type') is not None: content_type = resp.getheader('Content-Type').split(';') if not any([x.startswith('image/png') for x in content_type]): try: data = utils.load_json(body.strip()) except ValueError: if 199 < statuscode < 300: status = ErrorCode.SUCCESS else: status = ErrorCode.UNKNOWN_ERROR return {'status': status, 'value': body.strip()} assert type(data) is dict, ( 'Invalid server response body: %s' % body) # Some of the drivers incorrectly return a response # with no 'value' field when they should return null. if 'value' not in data: data['value'] = None return data else: data = {'status': 0, 'value': body.strip()} return data finally: LOGGER.debug("Finished Request") resp.close()
def as_str(self): """Render URL as a string""" return urlunparse(self.to_pr())
def add_token(url, login): parsed = parse.urlparse(url) qs = parse.parse_qs(parsed.query) qs['user_token'] = login new_url = parsed._replace(query=parse.urlencode(qs)) return parse.urlunparse(new_url)
def __init__(self, remote_server_addr, keep_alive=False, resolve_ip=True): # Attempt to resolve the hostname and get an IP address. self.keep_alive = keep_alive parsed_url = parse.urlparse(remote_server_addr) addr = parsed_url.hostname if parsed_url.hostname and resolve_ip: port = parsed_url.port or None ip = common_utils.find_connectable_ip(parsed_url.hostname, port=port) if ip: netloc = ip addr = netloc if parsed_url.port: netloc = common_utils.join_host_port(netloc, parsed_url.port) if parsed_url.username: auth = parsed_url.username if parsed_url.password: auth += ':%s' % parsed_url.password netloc = '%s@%s' % (auth, netloc) remote_server_addr = parse.urlunparse( (parsed_url.scheme, netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) else: LOGGER.info('Could not get IP address for host: %s' % parsed_url.hostname) self._url = remote_server_addr if keep_alive: self._conn = httplib.HTTPConnection( str(addr), str(parsed_url.port), timeout=self._timeout) self._commands = { Command.STATUS: ('GET', '/status'), Command.NEW_SESSION: ('POST', '/session'), Command.GET_ALL_SESSIONS: ('GET', '/sessions'), Command.QUIT: ('DELETE', '/session/$sessionId'), Command.GET_CURRENT_WINDOW_HANDLE: ('GET', '/session/$sessionId/window_handle'), Command.GET_WINDOW_HANDLES: ('GET', '/session/$sessionId/window_handles'), Command.GET: ('POST', '/session/$sessionId/url'), Command.GO_FORWARD: ('POST', '/session/$sessionId/forward'), Command.GO_BACK: ('POST', '/session/$sessionId/back'), Command.REFRESH: ('POST', '/session/$sessionId/refresh'), Command.EXECUTE_SCRIPT: ('POST', '/session/$sessionId/execute'), Command.GET_CURRENT_URL: ('GET', '/session/$sessionId/url'), Command.GET_TITLE: ('GET', '/session/$sessionId/title'), Command.GET_PAGE_SOURCE: ('GET', '/session/$sessionId/source'), Command.SCREENSHOT: ('GET', '/session/$sessionId/screenshot'), Command.ELEMENT_SCREENSHOT: ('GET', '/session/$sessionId/screenshot/$id'), Command.FIND_ELEMENT: ('POST', '/session/$sessionId/element'), Command.FIND_ELEMENTS: ('POST', '/session/$sessionId/elements'), Command.GET_ACTIVE_ELEMENT: ('POST', '/session/$sessionId/element/active'), Command.FIND_CHILD_ELEMENT: ('POST', '/session/$sessionId/element/$id/element'), Command.FIND_CHILD_ELEMENTS: ('POST', '/session/$sessionId/element/$id/elements'), Command.CLICK_ELEMENT: ('POST', '/session/$sessionId/element/$id/click'), Command.CLEAR_ELEMENT: ('POST', '/session/$sessionId/element/$id/clear'), Command.SUBMIT_ELEMENT: ('POST', '/session/$sessionId/element/$id/submit'), Command.GET_ELEMENT_TEXT: ('GET', '/session/$sessionId/element/$id/text'), Command.SEND_KEYS_TO_ELEMENT: ('POST', '/session/$sessionId/element/$id/value'), Command.SEND_KEYS_TO_ACTIVE_ELEMENT: ('POST', '/session/$sessionId/keys'), Command.UPLOAD_FILE: ('POST', "/session/$sessionId/file"), Command.GET_ELEMENT_VALUE: ('GET', '/session/$sessionId/element/$id/value'), Command.GET_ELEMENT_TAG_NAME: ('GET', '/session/$sessionId/element/$id/name'), Command.IS_ELEMENT_SELECTED: ('GET', '/session/$sessionId/element/$id/selected'), Command.SET_ELEMENT_SELECTED: ('POST', '/session/$sessionId/element/$id/selected'), Command.IS_ELEMENT_ENABLED: ('GET', '/session/$sessionId/element/$id/enabled'), Command.IS_ELEMENT_DISPLAYED: ('GET', '/session/$sessionId/element/$id/displayed'), Command.GET_ELEMENT_LOCATION: ('GET', '/session/$sessionId/element/$id/location'), Command.GET_ELEMENT_LOCATION_ONCE_SCROLLED_INTO_VIEW: ('GET', '/session/$sessionId/element/$id/location_in_view'), Command.GET_ELEMENT_SIZE: ('GET', '/session/$sessionId/element/$id/size'), Command.GET_ELEMENT_RECT: ('GET', '/session/$sessionId/element/$id/rect'), Command.GET_ELEMENT_ATTRIBUTE: ('GET', '/session/$sessionId/element/$id/attribute/$name'), Command.ELEMENT_EQUALS: ('GET', '/session/$sessionId/element/$id/equals/$other'), Command.GET_ALL_COOKIES: ('GET', '/session/$sessionId/cookie'), Command.ADD_COOKIE: ('POST', '/session/$sessionId/cookie'), Command.DELETE_ALL_COOKIES: ('DELETE', '/session/$sessionId/cookie'), Command.DELETE_COOKIE: ('DELETE', '/session/$sessionId/cookie/$name'), Command.SWITCH_TO_FRAME: ('POST', '/session/$sessionId/frame'), Command.SWITCH_TO_PARENT_FRAME: ('POST', '/session/$sessionId/frame/parent'), Command.SWITCH_TO_WINDOW: ('POST', '/session/$sessionId/window'), Command.CLOSE: ('DELETE', '/session/$sessionId/window'), Command.GET_ELEMENT_VALUE_OF_CSS_PROPERTY: ('GET', '/session/$sessionId/element/$id/css/$propertyName'), Command.IMPLICIT_WAIT: ('POST', '/session/$sessionId/timeouts/implicit_wait'), Command.EXECUTE_ASYNC_SCRIPT: ('POST', '/session/$sessionId/execute_async'), Command.SET_SCRIPT_TIMEOUT: ('POST', '/session/$sessionId/timeouts/async_script'), Command.SET_TIMEOUTS: ('POST', '/session/$sessionId/timeouts'), Command.DISMISS_ALERT: ('POST', '/session/$sessionId/dismiss_alert'), Command.ACCEPT_ALERT: ('POST', '/session/$sessionId/accept_alert'), Command.SET_ALERT_VALUE: ('POST', '/session/$sessionId/alert_text'), Command.GET_ALERT_TEXT: ('GET', '/session/$sessionId/alert_text'), Command.SET_ALERT_CREDENTIALS: ('POST', '/session/$sessionId/alert/credentials'), Command.CLICK: ('POST', '/session/$sessionId/click'), Command.DOUBLE_CLICK: ('POST', '/session/$sessionId/doubleclick'), Command.MOUSE_DOWN: ('POST', '/session/$sessionId/buttondown'), Command.MOUSE_UP: ('POST', '/session/$sessionId/buttonup'), Command.MOVE_TO: ('POST', '/session/$sessionId/moveto'), Command.GET_WINDOW_SIZE: ('GET', '/session/$sessionId/window/$windowHandle/size'), Command.W3C_GET_WINDOW_SIZE: ('GET', '/session/$sessionId/window/size'), Command.SET_WINDOW_SIZE: ('POST', '/session/$sessionId/window/$windowHandle/size'), Command.W3C_SET_WINDOW_SIZE: ('POST', '/session/$sessionId/window/size'), Command.GET_WINDOW_POSITION: ('GET', '/session/$sessionId/window/$windowHandle/position'), Command.SET_WINDOW_POSITION: ('POST', '/session/$sessionId/window/$windowHandle/position'), Command.MAXIMIZE_WINDOW: ('POST', '/session/$sessionId/window/$windowHandle/maximize'), Command.W3C_MAXIMIZE_WINDOW: ('POST', '/session/$sessionId/window/maximize'), Command.SET_SCREEN_ORIENTATION: ('POST', '/session/$sessionId/orientation'), Command.GET_SCREEN_ORIENTATION: ('GET', '/session/$sessionId/orientation'), Command.SINGLE_TAP: ('POST', '/session/$sessionId/touch/click'), Command.TOUCH_DOWN: ('POST', '/session/$sessionId/touch/down'), Command.TOUCH_UP: ('POST', '/session/$sessionId/touch/up'), Command.TOUCH_MOVE: ('POST', '/session/$sessionId/touch/move'), Command.TOUCH_SCROLL: ('POST', '/session/$sessionId/touch/scroll'), Command.DOUBLE_TAP: ('POST', '/session/$sessionId/touch/doubleclick'), Command.LONG_PRESS: ('POST', '/session/$sessionId/touch/longclick'), Command.FLICK: ('POST', '/session/$sessionId/touch/flick'), Command.EXECUTE_SQL: ('POST', '/session/$sessionId/execute_sql'), Command.GET_LOCATION: ('GET', '/session/$sessionId/location'), Command.SET_LOCATION: ('POST', '/session/$sessionId/location'), Command.GET_APP_CACHE: ('GET', '/session/$sessionId/application_cache'), Command.GET_APP_CACHE_STATUS: ('GET', '/session/$sessionId/application_cache/status'), Command.CLEAR_APP_CACHE: ('DELETE', '/session/$sessionId/application_cache/clear'), Command.GET_NETWORK_CONNECTION: ('GET', '/session/$sessionId/network_connection'), Command.SET_NETWORK_CONNECTION: ('POST', '/session/$sessionId/network_connection'), Command.GET_LOCAL_STORAGE_ITEM: ('GET', '/session/$sessionId/local_storage/key/$key'), Command.REMOVE_LOCAL_STORAGE_ITEM: ('DELETE', '/session/$sessionId/local_storage/key/$key'), Command.GET_LOCAL_STORAGE_KEYS: ('GET', '/session/$sessionId/local_storage'), Command.SET_LOCAL_STORAGE_ITEM: ('POST', '/session/$sessionId/local_storage'), Command.CLEAR_LOCAL_STORAGE: ('DELETE', '/session/$sessionId/local_storage'), Command.GET_LOCAL_STORAGE_SIZE: ('GET', '/session/$sessionId/local_storage/size'), Command.GET_SESSION_STORAGE_ITEM: ('GET', '/session/$sessionId/session_storage/key/$key'), Command.REMOVE_SESSION_STORAGE_ITEM: ('DELETE', '/session/$sessionId/session_storage/key/$key'), Command.GET_SESSION_STORAGE_KEYS: ('GET', '/session/$sessionId/session_storage'), Command.SET_SESSION_STORAGE_ITEM: ('POST', '/session/$sessionId/session_storage'), Command.CLEAR_SESSION_STORAGE: ('DELETE', '/session/$sessionId/session_storage'), Command.GET_SESSION_STORAGE_SIZE: ('GET', '/session/$sessionId/session_storage/size'), Command.GET_LOG: ('POST', '/session/$sessionId/log'), Command.GET_AVAILABLE_LOG_TYPES: ('GET', '/session/$sessionId/log/types'), Command.CURRENT_CONTEXT_HANDLE: ('GET', '/session/$sessionId/context'), Command.CONTEXT_HANDLES: ('GET', '/session/$sessionId/contexts'), Command.SWITCH_TO_CONTEXT: ('POST', '/session/$sessionId/context'), }
def get_engine(dir=None, name=None): path = db_path(dir, name) url = urlunparse(('sqlite', '/', path, '', '', '')) LOG.debug('Creating engine for {0}.'.format(url)) return create_engine(url)
def url_to_string(url): return urlunparse(tuple(url.values()))
def process(self, url): # url is a tuple: (position, url); position in 0 .. depth - 1 try: # set of local urls urls = set() # check that the crawler is still lower then depth level if url.position < self.depth: url_handler = Req.urlopen(url.url_name) html_from_url = url_handler.read() html_souped = BeautifulSoup(html_from_url) for a in html_souped.find_all('a'): parsed_url = urlparse(a.get('href')) try: if parsed_url.netloc: if not parsed_url.scheme: raise ValueError('Scheme not given for url ' + parsed_url.netloc) url_to_crawl = urlunparse(parsed_url) elif parsed_url.path: if parsed_url.query: url_to_crawl = urljoin(url.url_name, urlunparse(parsed_url)) else: for index in INDEX: if index in parsed_url.path: #logging.info( # 'Duplicate url: ' + str(urljoin(url.url_name, urlunparse(parsed_url)))) #break raise CrawlerExceptions.IndexPageException( 'Duplicate url: ' + str(urljoin(url.url_name, urlunparse(parsed_url)))) else: url_to_crawl = str(urljoin(url.url_name, parsed_url.path)) else: continue except CrawlerExceptions.IndexPageException as indexpageerror: if self.log: logging.info(indexpageerror) pass with self.url_lock: is_new_url = url_to_crawl not in self.all_urls if is_new_url: with self.url_lock: self.all_urls.add(url_to_crawl) urls.add(url_to_crawl) url_temp = self.url_tuple(url.position + 1, url_to_crawl) self.urls_queue.put(url_temp) else: continue # check that the set of urls is not empty if urls: self.res_url_queue.put((self.number, url.url_name, url.position, urls)) except urllib.error.URLError as urlerror: if self.log: logging.error(urlerror) pass except ValueError as valerr: if self.log: logging.error(valerr) pass
def test_url(): req = make_request() assert req.url == urlunparse(('http', '', '/', None, '', None))
def _reset_path(self, path): parts = list(self.uri_parts) parts[2] = path url = urlparse.urlunparse(parts) return url
def _get_aad_token(cli_ctx, login_server, only_refresh_token, repository=None, artifact_repository=None, permission=None): """Obtains refresh and access tokens for an AAD-enabled registry. :param str login_server: The registry login server URL to log in to :param bool only_refresh_token: Whether to ask for only refresh token, or for both refresh and access tokens :param str repository: Repository for which the access token is requested :param str artifact_repository: Artifact repository for which the access token is requested :param str permission: The requested permission on the repository, '*' or 'pull' """ if repository and artifact_repository: raise ValueError( "Only one of repository and artifact_repository can be provided.") if (repository or artifact_repository) and permission not in ACCESS_TOKEN_PERMISSION: raise ValueError( "Permission is required for a repository or artifact_repository. Allowed access token permission: {}" .format(ACCESS_TOKEN_PERMISSION)) login_server = login_server.rstrip('/') challenge = requests.get('https://' + login_server + '/v2/', verify=(not should_disable_connection_verify())) if challenge.status_code not in [ 401 ] or 'WWW-Authenticate' not in challenge.headers: raise CLIError( "Registry '{}' did not issue a challenge.".format(login_server)) authenticate = challenge.headers['WWW-Authenticate'] tokens = authenticate.split(' ', 2) if len(tokens) < 2 or tokens[0].lower() != 'bearer': raise CLIError( "Registry '{}' does not support AAD login.".format(login_server)) params = { y[0]: y[1].strip('"') for y in (x.strip().split('=', 2) for x in tokens[1].split(',')) } if 'realm' not in params or 'service' not in params: raise CLIError( "Registry '{}' does not support AAD login.".format(login_server)) authurl = urlparse(params['realm']) authhost = urlunparse( (authurl[0], authurl[1], '/oauth2/exchange', '', '', '')) from azure.cli.core._profile import Profile profile = Profile(cli_ctx=cli_ctx) creds, _, tenant = profile.get_raw_token() headers = {'Content-Type': 'application/x-www-form-urlencoded'} content = { 'grant_type': 'access_token', 'service': params['service'], 'tenant': tenant, 'access_token': creds[1] } response = requests.post(authhost, urlencode(content), headers=headers, verify=(not should_disable_connection_verify())) if response.status_code not in [200]: raise CLIError( "Access to registry '{}' was denied. Response code: {}.".format( login_server, response.status_code)) refresh_token = loads(response.content.decode("utf-8"))["refresh_token"] if only_refresh_token: return refresh_token authhost = urlunparse( (authurl[0], authurl[1], '/oauth2/token', '', '', '')) if repository: scope = 'repository:{}:{}'.format(repository, permission) elif artifact_repository: scope = 'artifact-repository:{}:{}'.format(artifact_repository, permission) else: # catalog only has * as permission, even for a read operation scope = 'registry:catalog:*' content = { 'grant_type': 'refresh_token', 'service': login_server, 'scope': scope, 'refresh_token': refresh_token } response = requests.post(authhost, urlencode(content), headers=headers, verify=(not should_disable_connection_verify())) if response.status_code not in [200]: raise CLIError( "Access to registry '{}' was denied. Response code: {}.".format( login_server, response.status_code)) return loads(response.content.decode("utf-8"))["access_token"]
# the rest. # # 1. The scheme must be one of HTTP / HTTPS (and have no globs). # 2. The domain can have globs, but we limit it to characters that can # reasonably be a domain part. # TODO: This does not attempt to handle Unicode domain names. # 3. Other parts allow a glob to be any one, or more, characters. results = urlparse.urlparse(glob) # Ensure the scheme does not have wildcards (and is a sane scheme). if results.scheme not in {"http", "https"}: raise ValueError("Insecure oEmbed glob scheme: %s" % (results.scheme, )) pattern = urlparse.urlunparse([ results.scheme, re.escape(results.netloc).replace("\\*", "[a-zA-Z0-9_-]+"), ] + [re.escape(part).replace("\\*", ".+") for part in results[2:]]) _oembed_patterns[re.compile(pattern)] = endpoint @attr.s(slots=True) class OEmbedResult: # Either HTML content or URL must be provided. html = attr.ib(type=Optional[str]) url = attr.ib(type=Optional[str]) title = attr.ib(type=Optional[str]) # Number of seconds to cache the content. cache_age = attr.ib(type=int) class OEmbedError(Exception):
def get_pagination_context(page, pages_to_show=11, url=None, size=None, justify_content=None, extra=None, parameter_name="page"): """Generate Bootstrap pagination context from a page object.""" pages_to_show = int(pages_to_show) if pages_to_show < 1: raise ValueError( "Pagination pages_to_show should be a positive integer, you specified {pages_to_show}." .format(pages_to_show=pages_to_show)) num_pages = page.paginator.num_pages current_page = page.number half_page_num = int(floor(pages_to_show / 2)) if half_page_num < 0: half_page_num = 0 first_page = current_page - half_page_num if first_page <= 1: first_page = 1 if first_page > 1: pages_back = first_page - half_page_num if pages_back < 1: pages_back = 1 else: pages_back = None last_page = first_page + pages_to_show - 1 if pages_back is None: last_page += 1 if last_page > num_pages: last_page = num_pages if last_page < num_pages: pages_forward = last_page + half_page_num if pages_forward > num_pages: pages_forward = num_pages else: pages_forward = None if first_page > 1: first_page -= 1 if pages_back is not None and pages_back > 1: pages_back -= 1 else: pages_back = None pages_shown = [] for i in range(first_page, last_page + 1): pages_shown.append(i) # parse the url parts = urlparse(url or "") params = parse_qs(parts.query) # append extra querystring parameters to the url. if extra: params.update(parse_qs(extra)) # build url again. url = urlunparse([ parts.scheme, parts.netloc, parts.path, parts.params, urlencode(params, doseq=True), parts.fragment ]) # Set CSS classes, see http://getbootstrap.com/components/#pagination pagination_css_classes = ["pagination"] if size == "small": pagination_css_classes.append("pagination-sm") elif size == "large": pagination_css_classes.append("pagination-lg") if justify_content == "start": pagination_css_classes.append("justify-content-start") elif justify_content == "center": pagination_css_classes.append("justify-content-center") elif justify_content == "end": pagination_css_classes.append("justify-content-end") return { "bootstrap_pagination_url": url, "num_pages": num_pages, "current_page": current_page, "first_page": first_page, "last_page": last_page, "pages_shown": pages_shown, "pages_back": pages_back, "pages_forward": pages_forward, "pagination_css_classes": " ".join(pagination_css_classes), "parameter_name": parameter_name, }
def url(self): return urlunparse( (self.scheme, self.host, self.path, None, self.query_string, None))
def url_for(self, view_name: str, **kwargs): """Build a URL based on a view name and the values provided. In order to build a URL, all request parameters must be supplied as keyword arguments, and each parameter must pass the test for the specified parameter type. If these conditions are not met, a `URLBuildError` will be thrown. Keyword arguments that are not request parameters will be included in the output URL's query string. There are several _special_ keyword arguments that will alter how the URL will be returned: 1. **_anchor**: ``str`` - Adds an ``#anchor`` to the end 2. **_scheme**: ``str`` - Should be either ``"http"`` or ``"https"``, default is ``"http"`` 3. **_external**: ``bool`` - Whether to return the path or a full URL with scheme and host 4. **_host**: ``str`` - Used when one or more hosts are defined for a route to tell Sanic which to use (only applies with ``_external=True``) 5. **_server**: ``str`` - If not using ``_host``, this will be used for defining the hostname of the URL (only applies with ``_external=True``), defaults to ``app.config.SERVER_NAME`` If you want the PORT to appear in your URL, you should set it in: .. code-block:: app.config.SERVER_NAME = "myserver:7777" `See user guide re: routing <https://sanicframework.org/guide/basics/routing.html#generating-a-url>`__ :param view_name: string referencing the view name :param kwargs: keys and values that are used to build request parameters and query string arguments. :return: the built URL Raises: URLBuildError """ # find the route by the supplied view name kw: Dict[str, str] = {} # special static files url_for if "." not in view_name: view_name = f"{self.name}.{view_name}" if view_name.endswith(".static"): name = kwargs.pop("name", None) if name: view_name = view_name.replace("static", name) kw.update(name=view_name) route = self.router.find_route_by_view_name(view_name, **kw) if not route: raise URLBuildError( f"Endpoint with name `{view_name}` was not found" ) uri = route.path if getattr(route.ctx, "static", None): filename = kwargs.pop("filename", "") # it's static folder if "__file_uri__" in uri: folder_ = uri.split("<__file_uri__:", 1)[0] if folder_.endswith("/"): folder_ = folder_[:-1] if filename.startswith("/"): filename = filename[1:] kwargs["__file_uri__"] = filename if ( uri != "/" and uri.endswith("/") and not route.strict and not route.raw_path[:-1] ): uri = uri[:-1] if not uri.startswith("/"): uri = f"/{uri}" out = uri # _method is only a placeholder now, don't know how to support it kwargs.pop("_method", None) anchor = kwargs.pop("_anchor", "") # _external need SERVER_NAME in config or pass _server arg host = kwargs.pop("_host", None) external = kwargs.pop("_external", False) or bool(host) scheme = kwargs.pop("_scheme", "") if route.ctx.hosts and external: if not host and len(route.ctx.hosts) > 1: raise ValueError( f"Host is ambiguous: {', '.join(route.ctx.hosts)}" ) elif host and host not in route.ctx.hosts: raise ValueError( f"Requested host ({host}) is not available for this " f"route: {route.ctx.hosts}" ) elif not host: host = list(route.ctx.hosts)[0] if scheme and not external: raise ValueError("When specifying _scheme, _external must be True") netloc = kwargs.pop("_server", None) if netloc is None and external: netloc = host or self.config.get("SERVER_NAME", "") if external: if not scheme: if ":" in netloc[:8]: scheme = netloc[:8].split(":", 1)[0] else: scheme = "http" if "://" in netloc[:8]: netloc = netloc.split("://", 1)[-1] # find all the parameters we will need to build in the URL # matched_params = re.findall(self.router.parameter_pattern, uri) route.finalize() for param_info in route.params.values(): # name, _type, pattern = self.router.parse_parameter_string(match) # we only want to match against each individual parameter try: supplied_param = str(kwargs.pop(param_info.name)) except KeyError: raise URLBuildError( f"Required parameter `{param_info.name}` was not " "passed to url_for" ) # determine if the parameter supplied by the caller # passes the test in the URL if param_info.pattern: passes_pattern = param_info.pattern.match(supplied_param) if not passes_pattern: if param_info.cast != str: msg = ( f'Value "{supplied_param}" ' f"for parameter `{param_info.name}` does " "not match pattern for type " f"`{param_info.cast.__name__}`: " f"{param_info.pattern.pattern}" ) else: msg = ( f'Value "{supplied_param}" for parameter ' f"`{param_info.name}` does not satisfy " f"pattern {param_info.pattern.pattern}" ) raise URLBuildError(msg) # replace the parameter in the URL with the supplied value replacement_regex = f"(<{param_info.name}.*?>)" out = re.sub(replacement_regex, supplied_param, out) # parse the remainder of the keyword arguments into a querystring query_string = urlencode(kwargs, doseq=True) if kwargs else "" # scheme://netloc/path;parameters?query#fragment out = urlunparse((scheme, netloc, out, "", query_string, anchor)) return out
def __init__(self, remote_server_addr): # Attempt to resolve the hostname and get an IP address. parsed_url = parse.urlparse(remote_server_addr) if parsed_url.hostname: try: netloc = socket.gethostbyname(parsed_url.hostname) if parsed_url.port: netloc += ':%d' % parsed_url.port if parsed_url.username: auth = parsed_url.username if parsed_url.password: auth += ':%s' % parsed_url.password netloc = '%s@%s' % (auth, netloc) remote_server_addr = parse.urlunparse( (parsed_url.scheme, netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) except socket.gaierror: LOGGER.info('Could not get IP address for host: %s' % parsed_url.hostname) self._url = remote_server_addr self._commands = { Command.STATUS: ('GET', '/status'), Command.NEW_SESSION: ('POST', '/session'), Command.GET_ALL_SESSIONS: ('GET', '/sessions'), Command.QUIT: ('DELETE', '/session/$sessionId'), Command.GET_CURRENT_WINDOW_HANDLE: ('GET', '/session/$sessionId/window_handle'), Command.GET_WINDOW_HANDLES: ('GET', '/session/$sessionId/window_handles'), Command.GET: ('POST', '/session/$sessionId/url'), Command.GO_FORWARD: ('POST', '/session/$sessionId/forward'), Command.GO_BACK: ('POST', '/session/$sessionId/back'), Command.REFRESH: ('POST', '/session/$sessionId/refresh'), Command.EXECUTE_SCRIPT: ('POST', '/session/$sessionId/execute'), Command.GET_CURRENT_URL: ('GET', '/session/$sessionId/url'), Command.GET_TITLE: ('GET', '/session/$sessionId/title'), Command.GET_PAGE_SOURCE: ('GET', '/session/$sessionId/source'), Command.SCREENSHOT: ('GET', '/session/$sessionId/screenshot'), Command.SET_BROWSER_VISIBLE: ('POST', '/session/$sessionId/visible'), Command.IS_BROWSER_VISIBLE: ('GET', '/session/$sessionId/visible'), Command.FIND_ELEMENT: ('POST', '/session/$sessionId/element'), Command.FIND_ELEMENTS: ('POST', '/session/$sessionId/elements'), Command.GET_ACTIVE_ELEMENT: ('POST', '/session/$sessionId/element/active'), Command.FIND_CHILD_ELEMENT: ('POST', '/session/$sessionId/element/$id/element'), Command.FIND_CHILD_ELEMENTS: ('POST', '/session/$sessionId/element/$id/elements'), Command.CLICK_ELEMENT: ('POST', '/session/$sessionId/element/$id/click'), Command.CLEAR_ELEMENT: ('POST', '/session/$sessionId/element/$id/clear'), Command.SUBMIT_ELEMENT: ('POST', '/session/$sessionId/element/$id/submit'), Command.GET_ELEMENT_TEXT: ('GET', '/session/$sessionId/element/$id/text'), Command.SEND_KEYS_TO_ELEMENT: ('POST', '/session/$sessionId/element/$id/value'), Command.SEND_KEYS_TO_ACTIVE_ELEMENT: ('POST', '/session/$sessionId/keys'), Command.UPLOAD_FILE: ('POST', "/session/$sessionId/file"), Command.GET_ELEMENT_VALUE: ('GET', '/session/$sessionId/element/$id/value'), Command.GET_ELEMENT_TAG_NAME: ('GET', '/session/$sessionId/element/$id/name'), Command.IS_ELEMENT_SELECTED: ('GET', '/session/$sessionId/element/$id/selected'), Command.SET_ELEMENT_SELECTED: ('POST', '/session/$sessionId/element/$id/selected'), Command.TOGGLE_ELEMENT: ('POST', '/session/$sessionId/element/$id/toggle'), Command.IS_ELEMENT_ENABLED: ('GET', '/session/$sessionId/element/$id/enabled'), Command.IS_ELEMENT_DISPLAYED: ('GET', '/session/$sessionId/element/$id/displayed'), Command.HOVER_OVER_ELEMENT: ('POST', '/session/$sessionId/element/$id/hover'), Command.GET_ELEMENT_LOCATION: ('GET', '/session/$sessionId/element/$id/location'), Command.GET_ELEMENT_LOCATION_ONCE_SCROLLED_INTO_VIEW: ('GET', '/session/$sessionId/element/$id/location_in_view'), Command.GET_ELEMENT_SIZE: ('GET', '/session/$sessionId/element/$id/size'), Command.GET_ELEMENT_ATTRIBUTE: ('GET', '/session/$sessionId/element/$id/attribute/$name'), Command.ELEMENT_EQUALS: ('GET', '/session/$sessionId/element/$id/equals/$other'), Command.GET_ALL_COOKIES: ('GET', '/session/$sessionId/cookie'), Command.ADD_COOKIE: ('POST', '/session/$sessionId/cookie'), Command.DELETE_ALL_COOKIES: ('DELETE', '/session/$sessionId/cookie'), Command.DELETE_COOKIE: ('DELETE', '/session/$sessionId/cookie/$name'), Command.SWITCH_TO_FRAME: ('POST', '/session/$sessionId/frame'), Command.SWITCH_TO_WINDOW: ('POST', '/session/$sessionId/window'), Command.CLOSE: ('DELETE', '/session/$sessionId/window'), Command.DRAG_ELEMENT: ('POST', '/session/$sessionId/element/$id/drag'), Command.GET_SPEED: ('GET', '/session/$sessionId/speed'), Command.SET_SPEED: ('POST', '/session/$sessionId/speed'), Command.GET_ELEMENT_VALUE_OF_CSS_PROPERTY: ('GET', '/session/$sessionId/element/$id/css/$propertyName'), Command.IMPLICIT_WAIT: ('POST', '/session/$sessionId/timeouts/implicit_wait'), Command.EXECUTE_ASYNC_SCRIPT: ('POST', '/session/$sessionId/execute_async'), Command.SET_SCRIPT_TIMEOUT: ('POST', '/session/$sessionId/timeouts/async_script'), Command.SET_TIMEOUTS: ('POST', '/session/$sessionId/timeouts'), Command.DISMISS_ALERT: ('POST', '/session/$sessionId/dismiss_alert'), Command.ACCEPT_ALERT: ('POST', '/session/$sessionId/accept_alert'), Command.SET_ALERT_VALUE: ('POST', '/session/$sessionId/alert_text'), Command.GET_ALERT_TEXT: ('GET', '/session/$sessionId/alert_text'), Command.CLICK: ('POST', '/session/$sessionId/click'), Command.DOUBLE_CLICK: ('POST', '/session/$sessionId/doubleclick'), Command.MOUSE_DOWN: ('POST', '/session/$sessionId/buttondown'), Command.MOUSE_UP: ('POST', '/session/$sessionId/buttonup'), Command.MOVE_TO: ('POST', '/session/$sessionId/moveto'), Command.GET_WINDOW_SIZE: ('GET', '/session/$sessionId/window/$windowHandle/size'), Command.SET_WINDOW_SIZE: ('POST', '/session/$sessionId/window/$windowHandle/size'), Command.GET_WINDOW_POSITION: ('GET', '/session/$sessionId/window/$windowHandle/position'), Command.SET_WINDOW_POSITION: ('POST', '/session/$sessionId/window/$windowHandle/position'), Command.MAXIMIZE_WINDOW: ('POST', '/session/$sessionId/window/$windowHandle/maximize'), Command.SET_SCREEN_ORIENTATION: ('POST', '/session/$sessionId/orientation'), Command.GET_SCREEN_ORIENTATION: ('GET', '/session/$sessionId/orientation'), Command.SINGLE_TAP: ('POST', '/session/$sessionId/touch/click'), Command.TOUCH_DOWN: ('POST', '/session/$sessionId/touch/down'), Command.TOUCH_UP: ('POST', '/session/$sessionId/touch/up'), Command.TOUCH_MOVE: ('POST', '/session/$sessionId/touch/move'), Command.TOUCH_SCROLL: ('POST', '/session/$sessionId/touch/scroll'), Command.DOUBLE_TAP: ('POST', '/session/$sessionId/touch/doubleclick'), Command.LONG_PRESS: ('POST', '/session/$sessionId/touch/longclick'), Command.FLICK: ('POST', '/session/$sessionId/touch/flick'), Command.EXECUTE_SQL: ('POST', '/session/$sessionId/execute_sql'), Command.GET_LOCATION: ('GET', '/session/$sessionId/location'), Command.SET_LOCATION: ('POST', '/session/$sessionId/location'), Command.GET_APP_CACHE: ('GET', '/session/$sessionId/application_cache'), Command.GET_APP_CACHE_STATUS: ('GET', '/session/$sessionId/application_cache/status'), Command.CLEAR_APP_CACHE: ('DELETE', '/session/$sessionId/application_cache/clear'), Command.IS_BROWSER_ONLINE: ('GET', '/session/$sessionId/browser_connection'), Command.SET_BROWSER_ONLINE: ('POST', '/session/$sessionId/browser_connection'), Command.GET_LOCAL_STORAGE_ITEM: ('GET', '/session/$sessionId/local_storage/key/$key'), Command.REMOVE_LOCAL_STORAGE_ITEM: ('POST', '/session/$sessionId/local_storage/key/$key'), Command.GET_LOCAL_STORAGE_KEYS: ('GET', '/session/$sessionId/local_storage'), Command.SET_LOCAL_STORAGE_ITEM: ('POST', '/session/$sessionId/local_storage'), Command.CLEAR_LOCAL_STORAGE: ('DELETE', '/session/$sessionId/local_storage'), Command.GET_LOCAL_STORAGE_SIZE: ('GET', '/session/$sessionId/local_storage/size'), Command.GET_SESSION_STORAGE_ITEM: ('GET', '/session/$sessionId/session_storage/key/$key'), Command.REMOVE_SESSION_STORAGE_ITEM: ('DELETE', '/session/$sessionId/session_storage/key/$key'), Command.GET_SESSION_STORAGE_KEYS: ('GET', '/session/$sessionId/session_storage'), Command.SET_SESSION_STORAGE_ITEM: ('POST', '/session/$sessionId/session_storage'), Command.CLEAR_SESSION_STORAGE: ('DELETE', '/session/$sessionId/session_storage'), Command.GET_SESSION_STORAGE_SIZE: ('GET', '/session/$sessionId/session_storage/size'), Command.GET_LOG: ('POST', '/session/$sessionId/log'), Command.GET_AVAILABLE_LOG_TYPES: ('GET', '/session/$sessionId/log/types'), }
def reparse(self, parsed, default_scheme: str = ""): """Reparse a parsed URL into a parsed URL with values fixed. Args: parsed (:obj:`urllib.parse.ParseResult`): parsed URL to reparse default_scheme: default scheme to use if URL does not contain a scheme Returns: :obj:`urllib.parse.ParseResult` """ scheme, netloc, path, params, query, fragment = parsed host = parsed.hostname port = format(parsed.port or "") if not netloc and scheme and path and path.split("/")[0].isdigit(): """For case: >>> urllib.parse.urlparse('host:443/') ParseResult( scheme='host', netloc='', path='443/', params='', query='', fragment='' ) """ host = scheme # switch host from scheme to host port = path.split("/")[0] # remove / from path and assign to port path = "" # empty out path scheme = default_scheme netloc = ":".join([host, port]) if not netloc and path: """For cases: >>> urllib.parse.urlparse('host:443') ParseResult( scheme='', netloc='', path='host:443', params='', query='', fragment='' ) >>> urllib.parse.urlparse('host') ParseResult( scheme='', netloc='', path='host', params='', query='', fragment='' ) """ netloc, path = path, netloc if ":" in netloc: # pragma: no cover # can't get this to trigger anymore, ignore test coverage host, port = netloc.split(":", 1) netloc = ":".join([host, port]) if port else host else: host = netloc scheme = scheme or default_scheme if not scheme and port: if format(port) == "443": scheme = "https" elif format(port) == "80": scheme = "http" if not port: if scheme == "https": netloc = self.make_netloc(host, "443") elif scheme == "http": netloc = self.make_netloc(host, "80") pass2 = urlunparse((scheme, netloc, path, params, query, fragment)) return urlparse(pass2)