def source_link(source): label = source host = URL(source).host() if host == 'commons.wikimedia.org': label = 'wikimedia' elif host == 'en.wikipedia.org': label = 'wikipedia' return maybe_external_link(source, label=label)
def maybe_license_link(req, license, **kw): cc_link_ = cc_link(req, license, button=kw.pop('button', 'regular')) if cc_link_: return cc_link_ license_url = URL(license) if license_url.host(): return external_link(license_url, **kw) return license
def perform_action(self): self.add_product_to_basket(price=D('6.99')) basket = Basket.objects.all()[0] basket.freeze() url = reverse('paypal-success-response', kwargs={'basket_id': basket.id}) url = URL().path(url)\ .query_param('PayerID', '12345')\ .query_param('token', 'EC-8P797793UC466090M') self.response = self.client.get(str(url), follow=True)
def format_external_link_in_label(url, label=None): label = label or URL(url).domain() return HTML.span( HTML.a( HTML.i('', class_="icon-share icon-white"), label, href=url, style="color: white"), class_="label label-info")
def append_path(url, url_path): target = URL(url_path) if url_path.startswith('/'): url = url.path(target.path()) else: url = url.add_path_segment(target.path()) if target.query(): url = url.query(target.query()) return url.as_string()
def get_questions(s_link): url = URL(s_link) if url.domain() not in ['quora.com', 'www.quora.com']: return 'error, not quora' quora_url = URL(scheme='https', host='www.quora.com', path=url.path(), query='share=1').as_string() soup = BeautifulSoup(requests.get(quora_url).get_text()) topic = {} topic['url'] = quora_url topic['title'] = url.path().split('/')[-1] questions = [] divs = soup.find_all("div", {"class": "pagedlist_item"}) count = len(divs) - 1 for i in range(count): one_question = {'url': '', 'title': ''} try: one_question['url'] = divs[i].find( "a", {"class": "question_link"})['href'] one_question['title'] = divs[i].find("a", { "class": "question_link" }).find("span", { "class": "link_text" }).text except: jsonify(topic=topic, questions=questions, parse_failure=one_question) one_question['url'] = URL(scheme='https', host='www.quora.com', path=one_question['url']).as_string() if one_question['title'] != "": questions.append(one_question) print(f'{type(topic)}, {type(questions)}') return jsonify(topic=topic, questions=questions)
def do_fixup_recursive(self, obj): retval = False if 'href' in obj: _href = URL(obj['href']).scheme(self._base_uri.scheme()).host(self._base_uri.host()).port(self._base_uri.port() if self._base_uri.port() else (443 if self._base_uri.scheme() == 'https' else 80)) obj['href'] = str(_href) retval = True for key in obj: if isinstance(obj[key], list): for item in obj[key]: retval |= self.do_fixup_recursive(item) return retval
def _sendSessionDetails(self, user, sessionTime, tapsCounters, timeout): sessionUri = URL(self.apiEndPoint.value).add_path_segment('activity') payload = { 'sessionTime': sessionTime.isoformat(), 'personnelNumber':user.personnelId, 'Taps': {tapId:{'amount':tapsCounters[tapId].volume} for tapId in tapsCounters} } userReq = requests.post(sessionUri.as_string(), auth=HTTPBasicAuth(self._apiUser.value, self._apiKey.value), json=payload, timeout=timeout) userReq.raise_for_status() log.info('Session info for user: %d:%s added with activities: %s', user.personnelId, user.fullName, str([activity['ActivityId'] for activity in userReq.json()])) return True
def _isUserAuthenticatedImpl(self, cardId, timeout): userAuthUri = URL(self.apiEndPoint.value).add_path_segment('ispersonvalid').add_path_segment(str(cardId)) userReq = requests.get(userAuthUri.as_string(), auth=HTTPBasicAuth(self._apiUser.value, self._apiKey.value), timeout=timeout) userReq.raise_for_status() json = userReq.json() if isinstance(json, list): json = json[0] validUser = json.get('Valid', False) personnelId = int(json.get('PersonnelNumber', 0)) fullName = json.get('FullName', '') return (validUser, personnelId, fullName)
def perform_action(self): self.add_product_to_basket(price=D('23.99')) basket = Basket.objects.first() basket.freeze() self.url = reverse('paypal-handle-order', kwargs={'basket_id': basket.id}) url = URL().path(self.url) \ .query_param('PayerID', 'Y8K3PSJYN24D4') \ .query_param('token', 'EC-7F151994RW7618524') self.response = self.client.get(str(url), follow=True) self.order = Order.objects.first()
def download_template(source_uri_base, source_file, dest_dir, dest_filename, is_template, *args, **kwargs): dest_filename = os.path.join(dest_dir, dest_filename if dest_filename else source_file) with open(dest_filename, 'w') as fp: response = requests.get(URL(source_uri_base).add_path_segment(source_file).as_string()) response.raise_for_status() if is_template: content = rm.InlineTemplate(response.text, **kwargs).get_content() else: content = response.text fp.write(content) log.debug('Writing to %s. Content: %s', dest_filename, content) return dest_filename
def wait(self, url): """ 如果最近下载过该网站则延迟下载 """ domain = URL(url).host() last_accessed = self.domains.get(domain) if self.delay > 0 and last_accessed is not None: sleep_secs = self.delay - (datetime.now() - last_accessed).seconds if sleep_secs > 0: time.sleep(sleep_secs) self.domains[domain] = datetime.now() pass
def search_fair_trade_federation(self): org_info = CERT_ORGS["fair trade federation"] url = URL(scheme=WEB_SCHEME, host=org_info["url"], path=org_info["search_path"], query=f"members_search={self.company_name}") source = requests.get(url) page = BeautifulSoup(source.content, 'html.parser') search_results = page.find_all(class_='members-box') for sr in search_results: return True return False
def Just(name, search_text, url): keyword.add_keyword(name) find = keyword.extract_keywords(search_text) if find == []: print(name + "No FOUND") save(name, "No FOUND") else: Urls = keyword.extract_keywords(url) if Urls == []: u = URL(url) Get_icp(name, u) keyword.remove_keyword(name)
def blog_feed(request): """ Proxy feeds from the blog, so they can be accessed via XHR requests. We also convert RSS to ATOM so that clld's javascript Feed component can read them. """ if not request.params.get('path'): raise HTTPNotFound() path = URL(request.params['path']) assert not path.host() try: return atom_feed(request, request.blog.url(path.as_string())) except ConnectionError: # pragma: no cover raise HTTPNotFound()
def __init__(self, facet, key, doc_count, request_url, selected, formatter=None): self.facet = facet self.key = key self.doc_count = doc_count self.request_url = URL(request_url) self.selected = selected self.show_count = True self.formatter = formatter
def reset_attack(request): """ { "connection_id": "", "action": "", "timestamp": "yyyymmddhhmmss", "comment": "" } """ url = URL(scheme=BIGDATA_SCHEME, host=BIGDATA_HOST, path='/backend/bigdata/attcksolved') results = requests.post(url, data=None) return Response()
def __init__(self, aci_object=None, **kwargs): self.user = kwargs.get('user', None) self.password = kwargs.get('password', None) self.ip = str(URL(scheme='https', host=kwargs.get('ip', None))) if aci_object: pass self.apic_session = aci_rest.Session(self.ip, self.user, self.password) resp = self.apic_session.login() if not resp.ok: sys.exit(0) self.cmd = aci_rest.Commands(self.apic_session)
def search_spp(self): org_info = CERT_ORGS["SPP"] url = URL(scheme=WEB_SCHEME, host=org_info["url"], path=org_info["search_path"]) print(url) source = requests.get(url, headers=HEADERS) page = BeautifulSoup(source.content, 'html.parser') search_results = page.find_all(class_='slide-entry') for sr in search_results: result_name = sr.get('data-avia-tooltip') if self.is_company(result_name): return True return False
def __init__(self, host, user, password, need_auth, parent): self.parent = weakref.ref(parent) self._host = host self._user = user self._password = password self._need_auth = need_auth self._rest_session_container = RestSessionContainer() self._url = URL(scheme='https', host=host) self._auth_url = self._url.path('/api/v1.0/auth') self._commit_url = self._url.path('/api/v1.0/commit') self._rollback_url = self._url.path('/api/v1.0/rollback') self._check_auth_url = self._url.path('/api/v1.0/operation/chassis/epi') if self._need_auth: self._login()
def by_filter(self, filter): # note: addGisLink doesn't work here url = URL('odata/Baustellen')\ .query_param('addGisLink', 'False')\ .query_param('$filter', filter) records = self.client.get(url.as_string()).get('value', ()) records = (r for r in records if r['Internet']) work = [Roadwork(r) for r in records] work.sort(key=lambda r: r.title) return work
def _getInstallationPackages(self, currentVersion, packageType, checkUnpublished, timeout): packagesUri = URL(self.apiEndPoint.value) \ .add_path_segment('updates') \ .add_path_segment(packageType) \ .append_query_param('min-version_gt', currentVersion) \ .append_query_param('include-unpublished', 'true' if checkUnpublished else 'false') availablePackages = requests.get(packagesUri.as_string(), auth=HTTPBasicAuth( self._apiUser.value, self._apiKey.value), timeout=timeout) availablePackages.raise_for_status() return availablePackages.json()
def parseJson(self, response): resjson = demjson.decode(demjson.decode(response.text)) for res in resjson: if res['knowledgeType'] == 'VideoKnowledge': item = IlexueItem() siteURL = URL(res['knowledgeUrl']).scheme('http').domain( self.host) item['siteURL'] = siteURL.as_string() item['onclick'] = '' item['pageURL'] = response.request.url item['title'] = res['title'] item['fileName'] = self.base + item['title'] yield Request(url=item['siteURL'], meta={'item1': item}, callback=self.parseVideoAndDocument)
def _getValidUsersByCardIdImpl(self, timeout): validUsersUri = URL( self.apiEndPoint.value).add_path_segment('validpeople') validUsersReq = requests.get(validUsersUri.as_string(), auth=HTTPBasicAuth( self._apiUser.value, self._apiKey.value), timeout=timeout) validUsersReq.raise_for_status() json = validUsersReq.json() return { str(person['CardId']): User(person['PersonnelNumber'], person['CardId'], person['FullName'], person['Valid'], 3600 * 365) for person in json }
def __call__(self, index, row): if index == 0: self.cols = {col: i for i, col in enumerate(row)} return row url = URL(row[self.cols['src']]) try: for filename in url.path_segments(): if filename in self.md: if self.md[filename].get('source_url'): row[self.cols['source']] = self.md[filename]['source_url'] self.count += 1 break except IndexError: pass return row
def license_name(license_url): if license_url == "http://commons.wikimedia.org/wiki/GNU_Free_Documentation_License": return 'GNU Free Documentation License' if license_url == 'http://en.wikipedia.org/wiki/Public_domain': license_url = 'http://creativecommons.org/publicdomain/zero/1.0/' license_url_ = URL(license_url) if license_url_.host() != 'creativecommons.org': return license_url comps = license_url_.path().split('/') if len(comps) < 3: return license_url return { 'zero': 'Public Domain', }.get(comps[2], '(CC) %s' % comps[2].upper())
def by_id(self, id): url = URL(f'odata/Baustellen({int(id)})')\ .query_param('addGisLink', 'True') work = tuple( Roadwork(r) for r in self.client.get(url.as_string()).get('value', ())) if work: return work[0] # secondary lookup is against the subsections.. this probably calls # for an index eventually for r in self.roadwork: for section in r.sections: if section.id == id: return section
def ctx_for_url(self, url): """Method to reverse URL generation for resources, i.e. given a URL, tries to determine the associated resource. :return: model instance or None """ mapper = self.registry.getUtility(IRoutesMapper) _path = URL(url).path() info = mapper(WebobRequest({'PATH_INFO': _path})) if not info['route']: # FIXME: hack to cater to deployments under a path prefix info = mapper( WebobRequest({'PATH_INFO': re.sub('^\/[a-z]+', '', _path)})) if info['route'] and info['match']: for rsc in RESOURCES: if rsc.name == info['route'].name: return rsc.model.get(info['match']['id'], default=None)
def extract_url(self, response): if len(response.all_url) > 0: get_domain_list = [] get_url_list = [] for url in response.all_url: if not url: continue end_fix = url[-4:len(url)] if '.jpg.png.gif.rar.zip.doc.pdf.css'.find(end_fix) != -1: continue opt = URL(url) url_domain = opt.domain() if not url_domain: url = response.page_prefix + '/' + url url_domain = response.page_domain elif not opt.scheme(): url = 'http://' + url if url_domain.find('eastmoney') == -1: continue response.pipe.get(response.spider_name + 'been_url:' + url) get_domain_list.append(url_domain) get_url_list.append(url) for url_domain in get_domain_list: response.pipe.get(response.spider_name + 'ban_host:' + url_domain) get_urlex_dmexp_list = response.pipe.execute() adv_len = len(get_url_list) if len(get_urlex_dmexp_list) == 0 or len( get_urlex_dmexp_list) != adv_len + len(get_domain_list): return for index in range(len(get_url_list)): url = get_url_list[index] exist_flag = get_urlex_dmexp_list[index] if exist_flag: continue is_ban_host = get_urlex_dmexp_list[index + adv_len] if is_ban_host: continue response.pipe.lpush(self.redis_key, url) response.pipe.execute() return True
async def forward_request(token, request): """ Takes the given token and forwards the request to the associated redirect (must exist in database). Returns a web response with the same status code as the returned by the redirect. If the response was successful, the token is deleted immediately. """ token_bound = request.app.db[token] url = token_bound['url'] payload = copy(dict(request.query)) payload.update(await request.json(loads=quiet_json)) payload['oauth_redirect_secret'] = token_bound['secret'] async with ClientSession() as session: if token_bound['method'] == 'GET': url = URL(url) for key, value in payload.items(): url = url.query_param(key, value) def send_request(): return session.get(url.as_string()) elif token_bound['method'] == 'POST': def send_request(): return session.post(url, data=json.dumps(payload)) elif token_bound['method'] == 'PUT': def send_request(): return session.put(url, data=json.dumps(payload)) async with send_request() as response: if 200 <= response.status < 300: del request.app.db[token] return web.HTTPFound(token_bound['success_url']) else: return web.HTTPFound(token_bound['error_url'])