def validate_ticket(self, ticket, request): service_name = self.service_name ticket_name = self.ticket_name this_url = self.get_url(request) p = urlparse.urlparse(this_url) qs_map = urlparse.parse_qs(p.query) if ticket_name in qs_map: del qs_map[ticket_name] param_str = urlencode(qs_map, doseq=True) p = urlparse.ParseResult(*tuple(p[:4] + (param_str, ) + p[5:])) service_url = urlparse.urlunparse(p) params = { service_name: service_url, ticket_name: ticket, } param_str = urlencode(params, doseq=True) p = urlparse.urlparse(self.cas_info['service_validate_url']) p = urlparse.ParseResult(*tuple(p[:4] + (param_str, ) + p[5:])) service_validate_url = urlparse.urlunparse(p) self.log("Requesting service-validate URL => '{0}' ...".format( service_validate_url)) http_client = HTTPClient(self.cas_agent) d = http_client.get(service_validate_url) d.addCallback(treq.content) d.addCallback(self.parse_sv_results, service_url, ticket, request) return d
def setUp(self): super(TestClientHTTPBasicAuth, self).setUp() conf = ceilometer_service.prepare_service(argv=[], config_files=[]) self.CONF = self.useFixture(config_fixture.Config(conf)).conf self.parsed_url = urlparse.urlparse( 'http://127.0.0.1:8080/controller/nb/v2?container_name=default&' 'container_name=egg&auth=%s&user=admin&password=admin_pass&' 'scheme=%s' % (self.auth_way, self.scheme)) self.params = urlparse.parse_qs(self.parsed_url.query) self.endpoint = urlparse.urlunparse( urlparse.ParseResult(self.scheme, self.parsed_url.netloc, self.parsed_url.path, None, None, None)) odl_params = { 'auth': self.params.get('auth')[0], 'user': self.params.get('user')[0], 'password': self.params.get('password')[0] } self.client = client.Client(self.CONF, self.endpoint, odl_params) self.resp = mock.MagicMock() self.get = mock.patch('requests.get', return_value=self.resp).start() self.resp.raw.version = 1.1 self.resp.status_code = 200 self.resp.reason = 'OK' self.resp.headers = {} self.resp.content = 'dummy'
def wbsearchentities( search, language='en', strictlanguage=False, type_='item', limit=7, continue_=0, props=[], ): """Wrap Wikibase API wbsearchentities module: https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities""" query = { 'action': 'wbsearchentities', 'search': search, 'language': language, 'type': type_, 'limit': limit, 'continue': continue_, 'format': 'json' } if strictlanguage: query['strictlanguage'] = True if any(props): query['props'] = sep_join(props) url = parse.urlunparse( parse.ParseResult(scheme='https', netloc='www.wikidata.org', path='/w/api.php', params='', query=parse.urlencode(query), fragment='')) response = requests.get(url) print(f'[Status {response.status_code}]: {parse.unquote(response.url)}', file=sys.stderr) if response.status_code == 200: return response.json()
def get_sample_data(self, meter_name, parse_url, params, cache): extractor = self._get_extractor(meter_name) if extractor is None: # The way to getting meter is not implemented in this driver or # OpenDaylight REST API has not api to getting meter. return None iter = self._get_iter(meter_name) if iter is None: # The way to getting meter is not implemented in this driver or # OpenDaylight REST API has not api to getting meter. return None parts = urlparse.ParseResult( params.get('scheme', ['http'])[0], parse_url.netloc, parse_url.path, None, None, None) endpoint = urlparse.urlunparse(parts) data = self._prepare_cache(endpoint, params, cache) samples = [] for name, value in six.iteritems(data): for sample in iter(extractor, value): if sample is not None: # set controller name and container name # to resource_metadata sample[2]['controller'] = 'OpenDaylight' sample[2]['container'] = name samples.append(sample + (None, )) return samples
def validate_indico_url(url): """Construct the indico URL needed to get required timetable contents. eg https://indico.cern.ch/event/662485/timetable/?view=standard Parameters ---------- url : str Base URL to use Returns ------- str Correctly formatted URL """ url_info = urlparse.urlparse(url) path_parts = url_info.path.split("/") start_ind = path_parts.index("event") parts = path_parts[start_ind:start_ind + 2] parts.append("timetable") new_path = "/".join([""] + parts + [""]) new_url_info = urlparse.ParseResult(scheme=url_info.scheme, netloc=url_info.netloc, path=new_path, params='', query='view=standard', fragment='') return new_url_info.geturl()
def enforce_domain_https(): url = parse.urlparse(request.url) is_http = 'prematch.org' in url.netloc.lower() and url.scheme == 'http' if 'appspot' in url.netloc or is_http: newurl = parse.ParseResult('https', 'prematch.org', url.path, url.params, url.query, url.fragment) return redirect(newurl.geturl(), code=301)
def detect_urls_in_document(document: str, url: str) -> List[str]: url_parts = parse.urlparse(url) soup = BeautifulSoup(document, "html.parser") links = list(soup.select("a")) # document 내에 a 링크에 대한 처리 # 상대 적인 좌표의 경우, 현재 url scheme 및 netloc 사용 urls = [] for item in links: href = item.get("href", None) if not href: continue parts = parse.urlparse("{}".format(href)) if not parts.path: continue if parts.scheme not in ['http', 'https']: continue updated_parts = parse.ParseResult( scheme=parts.scheme if parts.scheme else url_parts.scheme, netloc=parts.netloc if parts.netloc else url_parts.netloc, path=parts.path, query=parts.query, params=parts.params, fragment=parts.fragment) urls.append(parse.urlunparse(updated_parts)) return urls
def test_novnc_bad_token(self): """Test accessing novnc console with bad token Do the WebSockify HTTP Request to novnc proxy with a bad token, the novnc proxy should reject the connection and closed it. """ if self.use_get_remote_console: body = self.client.get_remote_console( self.server['id'], console_type='novnc', protocol='vnc')['remote_console'] else: body = self.client.get_vnc_console(self.server['id'], type='novnc')['console'] self.assertEqual('novnc', body['type']) # Do the WebSockify HTTP Request to novncproxy with a bad token parts = urlparse.urlparse(body['url']) qparams = urlparse.parse_qs(parts.query) if 'path' in qparams: qparams['path'] = urlparse.unquote(qparams['path'][0]).replace( 'token=', 'token=bad') elif 'token' in qparams: qparams['token'] = 'bad' + qparams['token'][0] new_query = urlparse.urlencode(qparams) new_parts = urlparse.ParseResult(parts.scheme, parts.netloc, parts.path, parts.params, new_query, parts.fragment) url = urlparse.urlunparse(new_parts) self._websocket = compute.create_websocket(url) # Make sure the novncproxy rejected the connection and closed it data = self._websocket.receive_frame() self.assertTrue( data is None or not data, "The novnc proxy actually sent us some data, but we " "expected it to close the connection.")
def imdb_key_words(imdb_url): return_tuple_kw_votes = [] page_url_parts = url_parse.urlparse(imdb_url) print(page_url_parts) new_url = url_parse.ParseResult(scheme=page_url_parts.scheme, netloc=page_url_parts.netloc, path=page_url_parts.path + "/keywords", params='', query='', fragment='') keywords_url = url_parse.urlunparse(new_url) print(keywords_url) page = requests.get(keywords_url) soup = BeautifulSoup(page.content, 'html.parser') keyword_list = soup.findAll("td", {"class": "soda sodavote"}) print(keyword_list) for keyword_entry in keyword_list: keyword = (keyword_entry.find('div', { "class": "sodatext" }).find('a').text) vote_text = keyword_entry.find('div', { 'class': 'interesting-count-text' }).find('a').text m = re.match(" ([0-9]+)", vote_text) #print(vote_text) if m and int(m.group(0)) > 0: return_tuple_kw_votes.append((keyword, int(m.group(0)))) print(keyword, int(m.group(0))) #print(keyword_entry.a.text) #print(len(movie_desc)) return return_tuple_kw_votes
def force_https(parsed): return parse.ParseResult(scheme="https", netloc=parsed.netloc, path=parsed.path, params=parsed.params, query=parsed.query, fragment=parsed.fragment)
def download_chapters(chapter_hrefs: List[Chapter], base_directory: str): processing_list = [] for chapter in chapter_hrefs: target = os.path.join(base_directory, chapter.directory) logger.info(f"Making directory {target}") os.makedirs(target, exist_ok=True) for index, image in enumerate(chapter.images_hrefs, start=1): logger.debug(f"Processing {image}") parsed = parse.urlparse(image) safe_url = parse.ParseResult( scheme=parsed.scheme if parsed.scheme else "http", netloc=parsed.netloc, path=parsed.path, params=parsed.params, query=parsed.query, fragment=parsed.fragment) match = re.search("^.*(?P<extension>\\.[a-zA-Z]+)$", parsed.path.strip()) extension = match.group("extension") if match else "" filepath = os.path.join(base_directory, chapter.directory, str(index).rjust(3, '0') + extension) processing_list.append((filepath, safe_url.geturl(), index)) with ThreadPoolExecutor(max_workers=image_downloading_workers) as executor: futures = [] for (filepath, url, index) in processing_list: futures.append( executor.submit(download_image, filepath, url, index)) for future in futures: future.result()
def urlparse(url: str, scheme: str = '', allow_fragments: bool = True) -> 'ParseResult': """Wrapper function for :func:`urllib.parse.urlparse`. Args: url: URL to be parsed scheme: URL scheme allow_fragments: if allow fragments Returns: The parse result. Note: The function suppressed possible errors when calling :func:`urllib.parse.urlparse`. If any, it will return ``urllib.parse.ParseResult(scheme=scheme, netloc='', path=url, params='', query='', fragment='')`` directly. """ with contextlib.suppress(ValueError): return urllib_parse.urlparse(url, scheme, allow_fragments=allow_fragments) return urllib_parse.ParseResult(scheme=scheme, netloc='', path=url, params='', query='', fragment='')
def strip_query(parsed): return parse.ParseResult(scheme=parsed.scheme, netloc=parsed.netloc, path=parsed.path, params=parsed.params, query=None, fragment=parsed.fragment)
def trail_request(method, path, parameters: typing.Dict = {}, body: typing.Any = None): url = parse.ParseResult(scheme='http', netloc='localhost:8080', path=path, query=parse.urlencode(parameters), params='', fragment='') req = request.Request( parse.urlunparse(url), headers={ 'accept': 'application/json', 'Content-Type': 'application/json' }, method=method, data=None if body is None else json.dumps(body).encode()) with request.urlopen(req) as f: if f.status == 204: response = [] else: response = json.load(f) if f.status >= 400: raise RuntimeError(f'{response.message}, ' f'reasons: {response.reasons}') return response
def default_pfnunparse(pfnDict): """ Create PFN URI from pfnDict :param dict pfnDict: """ try: if not isinstance(pfnDict, dict): return S_ERROR("pfnunparse: wrong type for pfnDict argument, expected a dict, got %s" % type(pfnDict)) allDict = dict.fromkeys(["Protocol", "Host", "Port", "Path", "FileName", "Options"], "") allDict.update(pfnDict) scheme = allDict["Protocol"] netloc = allDict["Host"] if allDict["Port"]: netloc += ":%s" % allDict["Port"] path = os.path.join(allDict["Path"], allDict["FileName"]) query = allDict["Options"] pr = parse.ParseResult(scheme=scheme, netloc=netloc, path=path, params="", query=query, fragment="") pfn = pr.geturl() return S_OK(pfn) except Exception as e: # pylint: disable=broad-except errStr = "Pfn.default_pfnunparse: Exception while unparsing pfn: %s" % pfnDict gLogger.exception(errStr, lException=e) return S_ERROR(errStr)
def get_NPR_URL(date_s, program_id, NPR_API_key): """ get the NPR API tag for a specific NPR program. :param str date_s: a date string formatted as "YYYY-MM-DD". :param int program_id: the NPR program's integer ID. :param str NPR_API_key: the NPR API key. :returns: a :py:class:`str` of the exploded URL for REST API calls to the NPR API server. :rtype: str .. note:: no methods call this function any more, instead using the :py:mod:`requests` module's cleaner, higher-level functionality of REST API commands. """ nprApiDate = date_s.strftime('%Y-%m-%d') result = urlparse.ParseResult(scheme='https', netloc='api.npr.org', path='/query', params='', query=urlencode({ 'id': program_id, 'date': nprApiDate, 'dateType': 'story', 'output': 'NPRML', 'apiKey': NPR_API_key }), fragment='') return result.geturl()
def url_set_params(url: str, new_params: Optional[Dict[str, List[str]]]) -> str: """Sets params within a URL to a given mapping Arguments: url {str} -- URL which is parsed and changed new_params {Dict[str, List[str]]} -- new params to set for the URL Raises: ValueError: No URL specified Returns: {str} -- new modified URL """ if (not url): raise ValueError("need url to set params") if (new_params is None): new_params = {} parse_result = parse.urlparse(url) query_params_encoded = parse.urlencode(new_params, True) parse_result = parse.ParseResult( parse_result.scheme, parse_result.netloc, parse_result.path, parse_result.params, query_params_encoded, # change query params parse_result.fragment) new_url: str = parse.urlunparse(parse_result) return new_url
def subtitle_page_scrape(s_url, meta_data): page_url_parts = url_parse.urlparse(s_url) page = requests.get(s_url) soup = BeautifulSoup(page.content, 'html.parser') movie_desc = soup.find("div", {"class":"movie-desc"}) print(movie_desc.get_text()) meta_data.PlotSummary = movie_desc.text meta_data.ImdbUrl = _get_imdb_url(soup) year = soup.find("div", attrs={"id": "circle-score-year", "class": "circle", "data-info":"year"}) meta_data.Year = year.get("data-text") subtitles_table = soup.find('tbody') for subtitle_entry in subtitles_table.find_all('tr'): sub_lang = subtitle_entry.find('span', {"class":"sub-lang"}) if sub_lang.get_text().lower() == "english": download_cell = subtitle_entry.find('td', {"class":"download-cell"}) download_path = download_cell.a.get('href').split('/')[-1] + ".zip" new_url = url_parse.ParseResult(scheme='https', netloc="yifysubtitles.org", path="/subtitle/" + download_path, params='', query='', fragment='') subtitle_url = url_parse.urlunparse(new_url) print(subtitle_url) meta_data.SubtitleUrl = subtitle_url break return
def construct_unique_key(baseurl, params): ''' constructs a key that is guaranteed to uniquely and repeatably identify an API request by its baseurl and params AUTOGRADER NOTES: To correctly test this using the autograder, use an underscore ("_") to join your baseurl with the params and all the key-value pairs from params E.g., baseurl_key1_value1 Parameters ---------- baseurl: string The URL for the API endpoint params: dict A dictionary of param:value pairs Returns ------- string the unique key as a string ''' u = urlparse.urlparse(baseurl) orig_params = dict(urlparse.parse_qsl(u.query)) orig_params.update(params) newquery = urlencode(orig_params) new_url = urlparse.ParseResult(u.scheme, u.netloc, u.path, u.params, newquery, u.fragment).geturl() a = hashlib.md5() a.update(new_url.encode('utf-8')) return a.digest().hex()
def d2url(d): "Breaks down a URL into a dictionary, including decoding query string" if type(d.get("query")) is dict: d = d.copy() d["query"] = urls.urlencode(d["query"]) parts = urls.ParseResult(**d) return urls.unparse(parts)
def _parse_my_resource(resource): parse_url = netutils.urlsplit(resource) params = urlparse.parse_qs(parse_url.query) parts = urlparse.ParseResult(parse_url.scheme, parse_url.netloc, parse_url.path, None, None, None) return parts, params
def check_prefix(url): p = urlparse.urlparse(url, 'https') netloc = p.netloc or p.path path = p.path if p.netloc else '' p = urlparse.ParseResult('https', netloc, path, *p[3:]) url = p.geturl() return url
def trim_youtu_be(parsed): # rewrite to yotuube.com internally parsed = parse.ParseResult(scheme='https', netloc='youtube.com', path='/watch', params=None, query="v=%s" % parsed.path[1:], fragment=None) return trim_youtube_com(parsed)
def __init__(self, tcpdir, pointer): http.Message.__init__(self, tcpdir, pointer, dpkt_http.Request) # get query string. its the URL after the first '?' uri = urlparse.urlparse(self.msg.uri) self.host = self.msg.headers['host'] if 'host' in self.msg.headers else '' fullurl = urlparse.ParseResult('http', self.host, uri.path, uri.params, uri.query, uri.fragment) self.fullurl = fullurl.geturl() self.url, frag = urlparse.urldefrag(self.fullurl) self.query = urlparse.parse_qs(uri.query, keep_blank_values=True)
def _cronitor_url(key, command): return parse.urlunparse(parse.ParseResult( scheme='https', netloc='cronitor.link', path='{}/{}'.format(key, command), params='', query=parse.urlencode({'host': 'http://localhost:6011'}), fragment='' ))
def _get_post_url(self) -> str: parsed_url = parse.ParseResult( scheme=self.protocol, netloc=f"{self.server}:{self.port}", params="", path="/write", query=f"db={self.database}&precision=s", fragment="", ) return parse.urlunparse(parsed_url)
def storage_url(self) -> str: store_url = parse.quote_plus(self._wrapped_storage.storage_url) parsed_url = parse.ParseResult( scheme="debug", path=str(pathlib.Path(self._path).absolute()), netloc="", params="", query=f"storage={store_url}", fragment="") return parse.urlunparse(parsed_url)
def safeUrl(url: str) -> str: from urllib import parse pr = parse.urlparse(url) pr2 = parse.ParseResult(scheme=pr.scheme, netloc=pr.netloc, path=parse.quote(pr.path, '/%'), params=pr.params, query=pr.query, fragment=pr.fragment) return parse.urlunparse(pr2)
def strip_www(parsed): if parsed.netloc[:4] != "www.": return parsed return parse.ParseResult(scheme=parsed.scheme, netloc=parsed.netloc[4:], path=parsed.path, params=parsed.params, query=parsed.query, fragment=parsed.fragment)
def trim_i_imgur(parsed): # should end with an image extension if parsed.path[-4:-3] != ".": return parsed return parse.ParseResult(scheme='http', netloc='imgur.com', path=parsed.path[1:-4], params=None, query=None, fragment=None)