def login_url(cls, next_url=""): return_url = furl(settings.BASE_URL) return_url.path = "auth" ulb_url = furl("https://www.ulb.ac.be/commons/intranet") ulb_url.args["_prt"] = "ulb:gehol" ulb_url.args["_ssl"] = "on" ulb_url.args["_prtm"] = "redirect" ulb_url.args["_appl"] = return_url return ulb_url
def assert_urls_equal(url1, url2): furl1 = furl.furl(url1) furl2 = furl.furl(url2) for attr in ['scheme', 'host', 'port']: setattr(furl1, attr, None) setattr(furl2, attr, None) assert_equal(furl1, furl2)
def title_from_id(identifier_key, identifier_value): if identifier_key is None or identifier_value is None: raise AttributeError("Neither identifier key nor value were supplied") try: if identifier_key == "imdbid": if identifier_value[0:2] != "tt": identifier_value = "tt%s" % identifier_value url = furl("http://www.omdbapi.com").add({"i": identifier_value, "plot": "short", "r": "json"}).tostr() omdb = webaccess.get(url) return omdb.json()["Title"] if identifier_key not in ("rid", "tvdbid"): raise AttributeError("Unknown identifier %s" % identifier_key) tvmaze_key = "tvrage" if identifier_key == "rid" else "thetvdb" tvmaze = webaccess.get(furl("http://api.tvmaze.com/lookup/shows").add({tvmaze_key: identifier_value}).url) if tvmaze.status_code == 404: #Unfortunately TVMaze returns a 404 for unknown/invalid IDs raise ExternalApiInfoException("Unable to find id %s and value %s at TVMaze" % (identifier_key, identifier_value)) tvmaze.raise_for_status() return tvmaze.json()["name"] except (HTTPError, ConnectionError, ReadTimeout) as e: logger.exception("Unable to retrieve title by id %s and value %s" % (identifier_key, identifier_value)) raise ExternalApiInfoException(str(e)) except Exception as e: logger.exception("Unable to retrieve title by id %s and value %s" % (identifier_key, identifier_value)) raise ExternalApiInfoException(str(e))
def fetch_records(self, url): resp = self.requests.get(url) resp_xml = etree.XML(resp.content) num_records = int(resp_xml.xpath('//search_results/@count')[0]) if num_records > 0: # create a new URL to request all results url = furl(url).add(query_params={ 'count': num_records }).url all_records_resp = self.requests.get(url) all_records_doc = etree.XML(all_records_resp.content) # retrieve the URLs for each document to make requests for their full content record_urls = [ furl(record.xpath('url/node()')[0]).set(query_params={ 'displayxml': 'true' }).url for record in all_records_doc.xpath('//clinical_study') ] total = len(record_urls) for i, url in enumerate(record_urls): logger.debug('[%d / %d] Requesting %s', i, total, url) record_resp = self.requests.get(url) doc = etree.fromstring(record_resp.content, parser=etree.XMLParser(recover=True)) yield doc.xpath('//nct_id/node()')[0], etree.tostring(doc, encoding=str)
def addon_view_file(auth, node, file_node, version): # TODO: resolve circular import issue from website.addons.wiki import settings as wiki_settings if isinstance(version, tuple): version, error = version error = error.replace('\n', '').strip() else: error = None ret = serialize_node(node, auth, primary=True) if file_node._id + '-' + version._id not in node.file_guid_to_share_uuids: node.file_guid_to_share_uuids[file_node._id + '-' + version._id] = uuid.uuid4() node.save() if ret['user']['can_edit']: sharejs_uuid = str(node.file_guid_to_share_uuids[file_node._id + '-' + version._id]) else: sharejs_uuid = None download_url = furl.furl(request.url.encode('utf-8')).set(args=dict(request.args, **{ 'direct': None, 'mode': 'render', 'action': 'download', })) render_url = furl.furl(settings.MFR_SERVER_URL).set( path=['render'], args={'url': download_url.url} ) ret.update({ 'urls': { 'render': render_url.url, 'mfr': settings.MFR_SERVER_URL, 'sharejs': wiki_settings.SHAREJS_URL, 'gravatar': get_gravatar(auth.user, 25), 'files': node.web_url_for('collect_file_trees'), 'archived_from': get_archived_from_url(node, file_node) if node.is_registration else None, }, 'error': error, 'file_name': file_node.name, 'file_name_title': os.path.splitext(file_node.name)[0], 'file_name_ext': os.path.splitext(file_node.name)[1], 'file_path': file_node.path, 'sharejs_uuid': sharejs_uuid, 'provider': file_node.provider, 'materialized_path': file_node.materialized_path, 'extra': version.metadata.get('extra', {}), 'size': version.size if version.size is not None else 9966699, 'private': getattr(node.get_addon(file_node.provider), 'is_private', False), 'file_tags': [tag._id for tag in file_node.tags], 'file_guid': file_node.get_guid()._id, 'file_id': file_node._id, 'allow_comments': file_node.provider in settings.ADDONS_COMMENTABLE }) ret.update(rubeus.collect_addon_assets(node)) return ret
def switch_to_test_mode(self, instance_number=None): mongo_url = furl(self.settings.get('mongo', 'url')) server_port = self.settings.getint('test', 'server_port') server_url = furl(self.settings.get('test', 'server_url')) if instance_number is not None: mongo_url.path.segments[0] = "test_%d_%s" % (instance_number, mongo_url.path.segments[0]) server_port += instance_number if not server_url.port or server_url.port != self.settings.getint('test', 'server_port'): raise Exception("Can't detect how to adjust server url for instance: %d" % instance_number) server_url.port = server_port else: mongo_url.path.segments[0] = "test_%s" % mongo_url.path.segments[0] self.settings.set('mongo', 'url', str(mongo_url)) self.settings.set('server', 'port', str(server_port)) self.settings.set('server', 'url', str(server_url)) self.settings.set('ratelimit_authentication', 'allowed_failures', '10000') @self.route('/__test_drop_mongoengine_cache__') def endpoint(): self.logger.debug("Received /__test_drop_mongoengine_cache__ request, dropping mongoengine cached collections/connections") self.drop_mongoengine_cached_handles() return '' self.in_test_mode = True self.init_application()
def get_url_args(doc, defaults=None): """Return url args recovered from django_full_path cookie in the bokeh request header. If defaults values are provided, overwrite the default values obtained from the API """ args = get_data('defaults') # overwrite api default values if defaults: for key in defaults: args[key] = defaults[key] r = doc().session_context.request if r: if 'django_full_path' in r.cookies: django_full_path = r.cookies['django_full_path'].value tmp = furl(django_full_path).args for key in tmp: # overwrite default values with those passed # as url args, make sure the url arg (key) is valid if key in args: args[key] = tmp[key] # the bokeh app name is the second segment of the url path args['bokeh_app'] = furl(django_full_path).path.segments[1] return args
def get_featureOfInterest(query_uri_base, aws_urn=None): # assemble SOS query string for one or all stations q = None if aws_urn is not None: q = furl(query_uri_base + '/service').add({ 'service': 'SOS', 'version': '2.0.0', 'request': 'GetFeatureOfInterest', 'featureOfInterest': aws_urn }).url else: q = furl(query_uri_base + '/sos/kvp').add({ 'service': 'SOS', 'version': '2.0.0', 'request': 'GetFeatureOfInterest', }).url # run the query request creds = json.load(open('creds.json')) auth = HTTPProxyAuth(creds['username'], creds['password']) ga_proxy = {"http": creds['proxy']} headers = {'accept': 'application/json'} r = requests.get(q, headers=headers, proxies=ga_proxy, auth=auth) results = json.loads(r.text) # return one or all if aws_urn is not None: return results['featureOfInterest'][0] else: #return sorted(results['featureOfInterest'], key=lambda k: k['name']) return sorted(results['featureOfInterest'])
def fetch_records(self, url, end_day): page, detail = 0, None while True: page += 1 resp = self.requests.get(furl(url).add(query_params={ 'page': page, }).url) if resp.status_code == 422: # We've asked for too much. Time to readjust date range # Thanks for leaking variables python page, url = 0, furl(url).add(query_params={ 'modified_date': pendulum.parse(detail['modified_date']).date().isoformat() }) continue for item in resp.json(): resp = self.requests.get(item['url']) detail = resp.json() if pendulum.parse(detail['modified_date']).date() > end_day: return yield item['url'], detail if len(resp.json()) < self.page_size: return # We've hit the end of our results
def make_query(url, page): if page != 1: return furl(url).remove(['page']).add({"page": page}).url.split('?')[1] try: return furl(url).remove(['page']).url.split('?')[1] except IndexError: return ""
def fetch_records(self, url): resp = self.requests.get(url) resp_xml = etree.XML(resp.content) num_records = int(resp_xml.xpath('//search_results/@count')[0]) if num_records > 0: # create a new URL to request all results url = furl(url).add(query_params={ 'count': num_records }).url all_records_resp = self.requests.get(url) all_records_doc = etree.XML(all_records_resp.content) # retrieve the URLs for each document to make requests for their full content record_urls = [ furl(record.xpath('url/node()')[0]).set(query_params={ 'displayxml': 'true' }).url for record in all_records_doc.xpath('//clinical_study') ] logger.info("There are {} record urls to harvest - this may take a while...".format(len(record_urls))) for url in record_urls: try: record_resp = self.requests.get(url) except self.requests.exceptions.ConnectionError as e: logger.warning('Connection error: {}, wait a bit...'.format(e)) time.sleep(30) record_resp = self.requests.get(url) doc = etree.XML(record_resp.content) record = etree.tostring(doc) doc_id = doc.xpath('//nct_id/node()')[0] yield (doc_id, record)
def test_remove(self): url = 'http://host:69/a/big/path/?a=a&b=b&s=s+s#a frag?with=args&a=a' fu = furl.furl(url) assert fu == fu.remove(fragment=True, args=['a', 'b'], path='path', port=True) assert fu.url == 'http://host/a/big/?s=s+s' # No errors are thrown when removing url components that don't exist. fu = furl.furl(url) assert fu == fu.remove(fragment_path=['asdf'], fragment_args=['asdf'], args=['asdf'], path=['ppp', 'ump']) assert self._param(fu.url, 'a', 'a') assert self._param(fu.url, 'b', 'b') assert self._param(fu.url, 's', 's s') assert fu.pathstr == '/a/big/path/' assert fu.fragment.pathstr == 'a frag' assert fu.fragment.args == {'a':'a', 'with':'args'} # Path as a list of paths to join before removing. assert fu == fu.remove(fragment_path='a frag', fragment_args=['a'], query_params=['a','b'], path=['big', 'path'], port=True) assert fu.url == 'http://host/a/?s=s+s#with=args' assert fu == fu.remove(path=True, query=True, fragment=True) assert fu.url == 'http://host'
def download( urls: List[Tuple[str, Union[str, None]]], verbose: bool = False, force: bool = False ) -> None: for address, filename in urls: if not address: continue try: host = ".".join(furl(address).host.split(".")[-2:]) try: Story = AVAILABLE_SITES[host] story = Story(furl(address), verbose) story.force = force if filename: story.filename = filename story.run() except KeyError: click.echo( f"{__file__} is currently only able to download from {list2text(AVAILABLE_SITES.keys())}." ) except AttributeError as e: print(e) error = "There were problems with parsing the URL." with open("pyffdl.log", "a") as fp: click.echo(error, file=fp) click.echo(error, err=True)
def build_uri(self, uri, start, end, width='*', height='*', composite_to='*.*', bg_url=None, bg_width='*', bg_height='*', **kwds): """Create a cropped URL in Akamai >>> crop = AkamaiCrop() >>> crop.build_uri( ... 'https://example.com/test.jpg', ... Coord(10, 20), ... Coord(30, 40), ... ) 'https://example.com/test.jpg?crop=30:40%3B10,20' """ furl_obj = furl.furl(uri) furl_obj.args['crop'] = '{}:{};{},{}'.format( (end.x - start.x), (end.y - end.x), start.x, start.y) furl_obj.args['resize'] = '{}:{}'.format(width, height) akamai_url = furl_obj.url if bg_url: furl_obj.args['composite-to'] = composite_to bg_furl_obj = furl.furl(bg_url) bg_furl_obj.args['resize'] = '{}:{}'.format(bg_width, bg_height) akamai_url += ('|' + bg_furl_obj.url) return akamai_url
def parse_i18n( url, language_codes, default_language_code=None, ): """ Takes an url containing a "*" character and creates an index per language replacing the "*" with the language code, except for the default language. :param url: :return: """ if '*' not in furl(url).path.segments[0]: index_name = furl(url).path.segments[0] raise Exception( ( 'The index name in the haystack url {} is not supported. Must ' 'have a * in its name for multilingual index support' ).format(index_name) ) connections = {} for language_code in language_codes: if default_language_code and language_code == default_language_code: connections['default'] = parse(url, suffix='default') else: connections[language_code] = parse(url, suffix=language_code) return connections
def get_root_url(): f = furl() f.scheme = request.scheme f.host = furl(request.host_url).host f.port = config.settings.main.port if config.settings.main.urlBase: f.path = config.settings.main.urlBase return str(f) + "/"
def __init__(self, url, params=None): self._url = url self._furl = furl.furl(url) self._params = furl.furl(url).args self._furl.set(args={}) params = params or {} for (k, v) in params.items(): self._params.add(k, v)
def get_details_link(self, guid): if "nzbgeek" in self.settings.host: f = furl(self.settings.host) else: f = furl(self.settings.host.replace("api.", "www.")) # Quick and dirty fix so it doesn't link to the API f.path.add("details") f.path.add(guid) return f.url
def __init__(self, total, page=1, per_page=10, list_count=10, base_uri=None): self.total = total self.list_count = list_count self.page = page self.per_page = per_page if base_uri: self.uri = furl(base_uri) else: self.uri = furl('') self.calculate()
def test_auth_download(self): url = self.build_url() res = self.test_app.get(url) assert_equal(res.json['auth'], views.make_auth(self.user)) assert_equal(res.json['credentials'], self.node_addon.serialize_waterbutler_credentials()) assert_equal(res.json['settings'], self.node_addon.serialize_waterbutler_settings()) expected_url = furl.furl(self.node.api_url_for('create_waterbutler_log', _absolute=True)) observed_url = furl.furl(res.json['callback_url']) observed_url.port = expected_url.port assert_equal(expected_url, observed_url)
def create_file(request, uri, headers): folder_path = furl(uri).args['path'] # folder_name = folder_path.split('/')[1] # provider_name = furl(uri).args['path'] nid = furl(uri).args['nid'] provider = session.query(File).filter(File.parent == None and File.node_id==nid).one() new_file = create_new_file(provider) resp = json.dumps({ 'data':new_file.as_dict() }) return (200, headers, resp)
def assert_urls_equal(url1, url2): furl1 = furl.furl(url1) furl2 = furl.furl(url2) for attr in ['scheme', 'host', 'port']: setattr(furl1, attr, None) setattr(furl2, attr, None) # Note: furl params are ordered and cause trouble assert_equal(dict(furl1.args), dict(furl2.args)) furl1.args = {} furl2.args = {} assert_equal(furl1, furl2)
def test_auth_download(self): url = self.build_url() res = self.app.get(url, auth=self.user.auth) data = jwt.decode(jwe.decrypt(res.json['payload'].encode('utf-8'), self.JWE_KEY), settings.WATERBUTLER_JWT_SECRET, algorithm=settings.WATERBUTLER_JWT_ALGORITHM)['data'] assert_equal(data['auth'], views.make_auth(self.user)) assert_equal(data['credentials'], self.node_addon.serialize_waterbutler_credentials()) assert_equal(data['settings'], self.node_addon.serialize_waterbutler_settings()) expected_url = furl.furl(self.node.api_url_for('create_waterbutler_log', _absolute=True, _internal=True)) observed_url = furl.furl(data['callback_url']) observed_url.port = expected_url.port assert_equal(expected_url, observed_url)
def addon_view_file(auth, node, file_node, version): # TODO: resolve circular import issue from website.addons.wiki import settings as wiki_settings if isinstance(version, tuple): version, error = version error = error.replace("\n", "").strip() else: error = None ret = serialize_node(node, auth, primary=True) if file_node._id not in node.file_guid_to_share_uuids: node.file_guid_to_share_uuids[file_node._id] = uuid.uuid4() node.save() if ret["user"]["can_edit"]: sharejs_uuid = str(node.file_guid_to_share_uuids[file_node._id]) else: sharejs_uuid = None download_url = furl.furl(request.url.encode("utf-8")).set( args=dict(request.args, **{"direct": None, "mode": "render", "action": "download"}) ) render_url = furl.furl(settings.MFR_SERVER_URL).set(path=["render"], args={"url": download_url.url}) ret.update( { "urls": { "render": render_url.url, "mfr": settings.MFR_SERVER_URL, "sharejs": wiki_settings.SHAREJS_URL, "gravatar": get_gravatar(auth.user, 25), "files": node.web_url_for("collect_file_trees"), }, "error": error, "file_name": file_node.name, "file_name_title": os.path.splitext(file_node.name)[0], "file_name_ext": os.path.splitext(file_node.name)[1], "file_path": file_node.path, "sharejs_uuid": sharejs_uuid, "provider": file_node.provider, "materialized_path": file_node.materialized_path, "extra": version.metadata.get("extra", {}), "size": version.size if version.size is not None else 9966699, "private": getattr(node.get_addon(file_node.provider), "is_private", False), "file_tags": [tag._id for tag in file_node.tags], } ) ret.update(rubeus.collect_addon_assets(node)) return ret
def addon_view_or_download_file(auth, path, provider, **kwargs): extras = request.args.to_dict() action = extras.get('action', 'view') node = kwargs.get('node') or kwargs['project'] node_addon = node.get_addon(provider) if not path: raise HTTPError(httplib.BAD_REQUEST) if not node_addon: raise HTTPError(httplib.BAD_REQUEST, { 'message_short': 'Bad Request', 'message_long': 'The add-on containing this file is no longer connected to the {}.'.format(node.project_or_component) }) if not node_addon.has_auth: raise HTTPError(httplib.UNAUTHORIZED, { 'message_short': 'Unauthorized', 'message_long': 'The add-on containing this file is no longer authorized.' }) if not node_addon.complete: raise HTTPError(httplib.BAD_REQUEST, { 'message_short': 'Bad Request', 'message_long': 'The add-on containing this file is no longer configured.' }) if not path.startswith('/'): path = '/' + path guid_file, created = node_addon.find_or_create_file_guid(path) if guid_file.guid_url != request.path: guid_url = furl.furl(guid_file.guid_url) guid_url.args.update(extras) return redirect(guid_url) guid_file.maybe_set_version(**extras) if request.method == 'HEAD': download_url = furl.furl(guid_file.download_url) download_url.args.update(extras) download_url.args['accept_url'] = 'false' return make_response(('', 200, {'Location': download_url.url})) if action == 'download': download_url = furl.furl(guid_file.download_url) download_url.args.update(extras) return redirect(download_url.url) return addon_view_file(auth, node, node_addon, guid_file, extras)
def testUrlGeneration(self): w = NzbClub(config.indexerSettings.nzbclub) self.args = SearchRequest(query="a showtitle", season=1, episode=2) urls = w.get_showsearch_urls(self.args) self.assertEqual(1, len(urls)) print(urls[0]) self.assertEqual('a showtitle s01e02 or a showtitle 1x02', furl(urls[0]).args["q"]) self.args = SearchRequest(query="a showtitle", season=1, episode=None) urls = w.get_showsearch_urls(self.args) self.assertEqual(1, len(urls)) self.assertEqual('a showtitle s01 or a showtitle "season 1"', furl(urls[0]).args["q"])
def test_auth_bad_cookie(self): url = self.build_url(cookie=self.cookie) res = self.app.get(url, expect_errors=True) assert_equal(res.status_code, 200) data = jwt.decode(res.json, settings.WATERBUTLER_JWT_SECRET, algorithm=settings.WATERBUTLER_JWT_ALGORITHM)['data'] assert_equal(data['auth'], views.make_auth(self.user)) assert_equal(data['credentials'], self.node_addon.serialize_waterbutler_credentials()) assert_equal(data['settings'], self.node_addon.serialize_waterbutler_settings()) expected_url = furl.furl(self.node.api_url_for('create_waterbutler_log', _absolute=True)) observed_url = furl.furl(data['callback_url']) observed_url.port = expected_url.port assert_equal(expected_url, observed_url)
def testUrlGeneration(self): w = Binsearch(getIndexerSettingByName("binsearch")) self.args = SearchRequest(query="a showtitle", season=1, episode=2) urls = w.get_showsearch_urls(self.args) self.assertEqual(2, len(urls)) self.assertEqual('a showtitle s01e02', furl(urls[0]).args["q"]) self.assertEqual('a showtitle 1x02', furl(urls[1]).args["q"]) self.args = SearchRequest(query="a showtitle", season=1, episode=None) urls = w.get_showsearch_urls(self.args) self.assertEqual(2, len(urls)) self.assertEqual('a showtitle s01', furl(urls[0]).args["q"]) self.assertEqual('a showtitle "season 1"', furl(urls[1]).args["q"])
def process_formdata(self, valuelist): """ Process data received over the wire from a form. This will be called during form construction with data supplied through the `formdata` argument. :param valuelist: A list of strings to process. """ if valuelist: self.data = furl.furl(valuelist[0]) else: self.data = furl.furl('')
def fetch_records(self, url, start_date, end_date): count, page = 0, 0 resp = self.requests.get(furl(url).set(query_params={'page': page})) total = BeautifulSoup(resp.content, 'html.parser').find(id='page-title').text.split(' ')[0].strip().replace(',', '') try: total = int(total) except ValueError: # Handle the case of "No" results assert total == 'No' total = 0 logging.info('Found %d results from biorxiv', total) while count < total: links = re.findall(b'href="(/content/early/[^"]+?/[^"]+)"', resp.content) logger.info('On document %d of %d (%d%%)', count, total, (count / total) * 100) for link in links: article = self.requests.get('http://biorxiv.org' + link.decode()) if article.status_code // 100 != 2: logger.warning('Got non-200 status %s from %s', article, link) continue article.raise_for_status() soup = BeautifulSoup(article.content, 'lxml') data = { 'subject-areas': [ subject.a.text.strip() for subject in soup.find_all(**{'class': 'highwire-article-collection-term'}) ] } for meta in BeautifulSoup(article.content, 'lxml').find_all('meta'): if 'name' not in meta.attrs: continue if meta.attrs['name'] in data: if not isinstance(data[meta.attrs['name']], list): data[meta.attrs['name']] = [data[meta.attrs['name']]] data[meta.attrs['name']].append(meta.attrs['content']) else: data[meta.attrs['name']] = meta.attrs['content'] count += 1 yield link.decode(), data page += 1 resp = self.requests.get(furl(url).set(query_params={'page': page}))
def get_logout_url(self, service_url): url = furl.furl(self.BASE_URL) url.path.segments.append('logout') url.args['service'] = service_url return url.url
TRANSCRIPT = re.compile('Earnings Call Transcript') next_page = True page = 1 driver = webdriver.Firefox() while next_page: print(f'Page: {page}') url = f'{SA_URL}/earnings/earnings-call-transcripts/{page}' driver.get(urljoin(SA_URL, url)) response = driver.page_source page += 1 soup = BeautifulSoup(response, 'lxml') links = soup.find_all(name='a', string=TRANSCRIPT) if len(links) == 0: next_page = False else: for link in links: transcript_url = link.attrs.get('href') article_url = furl(urljoin(SA_URL, transcript_url)).add({'part': 'single'}) driver.get(article_url.url) html = driver.page_source result = parse_html(html) if result is not None: meta, participants, content = result meta['link'] = link store_result(meta, participants, content) sleep(5 + (random() - .5) * 2) driver.close() # pd.Series(articles).to_csv('articles.csv')
def clone_repository_cached(session, execution, destination): # type: (Session, ExecutionInfo, Path) -> Tuple[VCS, RepoInfo] """ Clone a remote repository. :param execution: execution info :param destination: directory to clone to (in which a directory for the repository will be created) :param session: program session :return: repository information :raises: CommandFailedError if git/hg is not installed """ # mock lock repo_lock = Lock() repo_lock_timeout_sec = 300 repo_url = execution.repository or '' # type: str parsed_url = furl(repo_url) no_password_url = parsed_url.copy().remove(password=True).url clone_folder_name = Path(str(furl(repo_url).path)).name # type: str clone_folder = Path(destination) / clone_folder_name standalone_mode = session.config.get("agent.standalone_mode", False) if standalone_mode: cached_repo_path = clone_folder else: vcs_cache_path = Path(session.config["agent.vcs_cache.path"]).expanduser() repo_hash = md5(ensure_binary(repo_url)).hexdigest() # create lock repo_lock = FileLock(filename=(vcs_cache_path / '{}.lock'.format(repo_hash)).as_posix()) # noinspection PyBroadException try: repo_lock.acquire(timeout=repo_lock_timeout_sec) except BaseException: print('Could not lock cache folder "{}" (timeout {} sec), using temp vcs cache.'.format( clone_folder_name, repo_lock_timeout_sec)) repo_hash = '{}_{}'.format(repo_hash, str(random()).replace('.', '')) # use mock lock for the context repo_lock = Lock() # select vcs cache folder cached_repo_path = vcs_cache_path / "{}.{}".format(clone_folder_name, repo_hash) / clone_folder_name with repo_lock: vcs = VcsFactory.create( session, execution_info=execution, location=cached_repo_path ) if not find_executable(vcs.executable_name): raise CommandFailedError(vcs.executable_not_found_error_help()) if not standalone_mode: if session.config["agent.vcs_cache.enabled"] and cached_repo_path.exists(): print('Using cached repository in "{}"'.format(cached_repo_path)) else: print("cloning: {}".format(no_password_url)) rm_tree(cached_repo_path) # We clone the entire repository, not a specific branch vcs.clone() # branch=execution.branch) vcs.pull() rm_tree(destination) shutil.copytree(Text(cached_repo_path), Text(clone_folder)) if not clone_folder.is_dir(): raise CommandFailedError( "copying of repository failed: from {} to {}".format( cached_repo_path, clone_folder ) ) # checkout in the newly copy destination vcs.location = Text(clone_folder) vcs.checkout() repo_info = vcs.get_repository_copy_info(clone_folder) # make sure we have no user/pass in the returned repository structure repo_info = attr.evolve(repo_info, url=no_password_url) return vcs, repo_info
def _get_prefix_from_bucket_config(self, config): prefix = furl.furl(scheme="gs", netloc=config.bucket, path=config.subdir) return str(prefix)
def server_url(self) -> furl: return furl().set(scheme=self.scheme, host=self.host, port=self.server_port, path=self.imposters_path)
LOGOUT_REDIRECT_URL = "/" # Database # https://docs.djangoproject.com/en/2.0/ref/settings/#databases DATABASES = { "default": { "ENGINE": "django.db.backends.sqlite3", "NAME": path.join(BASE_DIR, "db.sqlite3"), } } # Change 'default' database configuration with $DATABASE_URL. DATABASES["default"].update( dj_database_url.config( env="DATABASE_URL", conn_max_age=env.int("DATABASE_CONN_MAX_AGE", 500), ssl_require="sslmode" not in furl(env("DATABASE_URL", "")).args, )) # work-around for dj-database-url: explicitly disable ssl for sqlite if DATABASES["default"].get("ENGINE") == "django.db.backends.sqlite3": DATABASES["default"].get("OPTIONS", {}).pop("sslmode", None) # work-around for dj-database-url: patch ssl for mysql if DATABASES["default"].get("ENGINE") == "django.db.backends.mysql": DATABASES["default"].get("OPTIONS", {}).pop("sslmode", None) if env("MYSQL_SSL_CA", None): DATABASES["default"].setdefault("OPTIONS", {}).setdefault("ssl", {}).setdefault( "ca", env("MYSQL_SSL_CA", None)) # default to a sensible modern driver for Azure SQL
def get_url(self, url='url', **keys): parsed = furl.furl(self.credentials.get(url, '')) for key, value in keys.items(): setattr(parsed, key, self.credentials.get(value)) return parsed.url
def addon_view_file(auth, node, file_node, version): # TODO: resolve circular import issue from website.addons.wiki import settings as wiki_settings if isinstance(version, tuple): version, error = version error = error.replace('\n', '').strip() else: error = None ret = serialize_node(node, auth, primary=True) if file_node._id not in node.file_guid_to_share_uuids: node.file_guid_to_share_uuids[file_node._id] = uuid.uuid4() node.save() if ret['user']['can_edit']: sharejs_uuid = str(node.file_guid_to_share_uuids[file_node._id]) else: sharejs_uuid = None download_url = furl.furl(request.url.encode('utf-8')).set( args=dict(request.args, **{ 'direct': None, 'mode': 'render', 'action': 'download', })) render_url = furl.furl(settings.MFR_SERVER_URL).set( path=['render'], args={'url': download_url.url}) ret.update({ 'urls': { 'render': render_url.url, 'mfr': settings.MFR_SERVER_URL, 'sharejs': wiki_settings.SHAREJS_URL, 'gravatar': get_gravatar(auth.user, 25), 'files': node.web_url_for('collect_file_trees'), }, 'error': error, 'file_name': file_node.name, 'file_name_title': os.path.splitext(file_node.name)[0], 'file_name_ext': os.path.splitext(file_node.name)[1], 'file_path': file_node.path, 'sharejs_uuid': sharejs_uuid, 'provider': file_node.provider, 'materialized_path': file_node.materialized_path, 'extra': version.metadata.get('extra', {}), 'size': version.size if version.size is not None else 9966699, 'private': getattr(node.get_addon(file_node.provider), 'is_private', False), 'file_tags': [tag._id for tag in file_node.tags], 'file_guid': file_node.get_guid()._id, 'file_id': file_node._id, 'allow_comments': file_node.provider in settings.ADDONS_COMMENTABLE }) ret.update(rubeus.collect_addon_assets(node)) return ret
def update(self): """ Downloads the latest source tarball from github and installs it over the existing version. """ base_url = furl(self.repositoryBase) base_url.path.add(self.repository) base_url.path.add("tarball") base_url.path.add(self.branch) tar_download_url = base_url.url main_dir = os.path.dirname(os.path.dirname(__file__)) try: self.backup() # prepare the update dir update_dir = os.path.join(main_dir, 'update') if os.path.isdir(update_dir): logger.info("Clearing out update folder " + update_dir + " before extracting") shutil.rmtree(update_dir) logger.info("Creating update folder " + update_dir + " before extracting") os.makedirs(update_dir) # retrieve file logger.info("Downloading update from " + repr(tar_download_url)) tar_download_path = os.path.join(update_dir, 'sb-update.tar') response = webaccess.get(tar_download_url, stream=True) #Apparently SSL causes problems on some systems (#138)b with open(tar_download_path, 'wb') as out_file: shutil.copyfileobj(response.raw, out_file) del response if not os.path.isfile(tar_download_path): logger.error("Unable to retrieve new version from " + tar_download_url + ", can't update") return False if not tarfile.is_tarfile(tar_download_path): logger.error("Retrieved version from " + tar_download_url + " is corrupt, can't update") return False # extract to sb-update dir logger.info("Extracting update file " + tar_download_path) tar = tarfile.open(tar_download_path) tar.extractall(update_dir) tar.close() # delete .tar.gz logger.info("Deleting update file " + tar_download_path) os.remove(tar_download_path) # find update dir name update_dir_contents = [x for x in os.listdir(update_dir) if os.path.isdir(os.path.join(update_dir, x))] if len(update_dir_contents) != 1: logger.error("Invalid update data, update failed: " + str(update_dir_contents)) return False content_dir = os.path.join(update_dir, update_dir_contents[0]) # walk temp folder and move files to main folder logger.info("Moving files from " + content_dir + " to " + main_dir) for dirname, dirnames, filenames in os.walk(content_dir): dirname = dirname[len(content_dir) + 1:] for curfile in filenames: old_path = os.path.join(content_dir, dirname, curfile) new_path = os.path.join(main_dir, dirname, curfile) if os.path.isfile(new_path): os.remove(new_path) os.renames(old_path, new_path) except Exception as e: logger.error("Error while trying to update: " + str(e)) return False logger.info("Update successful") return True
def validate_referer(referer): if furl.furl(referer).host != furl.furl(request.url).host: raise ValidationError('Invalid referer.')
def __init__(self, url, client_info=None, timeout=15): client.Client.__init__(self, url, client_info, timeout) self.base_url = furl(url)
def solr_request(path: str, params: SolrParams = None, content: Union[str, SolrParams] = None, content_type: Optional[str] = None, config: Optional[CommonConfig] = None) -> str: """ Send a request to Solr. :param path: Solr path to call, e.g. 'select'. :param params: Query parameters to add to the path. :param content: String or dictionary content to send via POST request. :param content_type: Content-Type for the POST content. :param config: (testing) Configuration object :return: Raw response content on success, raise exception on error. """ path = decode_object_from_bytes_if_needed(path) params = decode_object_from_bytes_if_needed(params) content = decode_object_from_bytes_if_needed(content) content_type = decode_object_from_bytes_if_needed(content_type) if not path: raise McSolrRequestInvalidParamsException("Path is unset.") if params: if not isinstance(params, dict): raise McSolrRequestInvalidParamsException( f"Params is not a dictionary: {params}") if content: if not (isinstance(content, str) or isinstance(content, dict)): raise McSolrRequestInvalidParamsException( f"Content is not a string not a dictionary: {content}") if not config: config = CommonConfig() solr_url = config.solr_url() if not params: params = {} abs_uri = furl(f"{solr_url}/mediacloud/{path}") abs_uri = abs_uri.set(params) abs_url = str(abs_uri) ua = UserAgent() ua.set_timeout(__QUERY_HTTP_TIMEOUT) ua.set_max_size(None) # Remediate CVE-2017-12629 q_param = str(params.get('q', '')) if 'xmlparser' in q_param.lower(): raise McSolrRequestQueryErrorException( "XML queries are not supported.") # Solr might still be starting up so wait for it to expose the collections list __wait_for_solr_to_start(config=config) if content: if not content_type: fallback_content_type = 'text/plain; charset=utf-8' log.warning( f"Content-Type is not set; falling back to '{fallback_content_type}'" ) content_type = fallback_content_type if isinstance(content, dict): content = urlencode(content, doseq=True) content_encoded = content.encode('utf-8', errors='replace') request = Request(method='POST', url=abs_url) request.set_header(name='Content-Type', value=content_type) request.set_header(name='Content-Length', value=str(len(content_encoded))) request.set_content(content_encoded) else: request = Request(method='GET', url=abs_url) log.debug(f"Sending Solr request: {request}") response = ua.request(request) if not response.is_success(): error_message = __solr_error_message_from_response(response=response) raise McSolrRequestQueryErrorException( f"Error fetching Solr response: {error_message}") return response.decoded_content()
def source_url(self): url = furl(self.source_node.absolute_url) url.path.segments = self.source_node.web_url_for( 'collect_file_trees').split('/') return url.url
WSGI_APPLICATION = 'django_wyh.wsgi.application' # Database # https://docs.djangoproject.com/en/3.0/ref/settings/#databases DATABASES = { 'default': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), } } DATABASES['default'].update( dj_database_url.config( env='DATABASE_URL', conn_max_age=env.int('DATABASE_CONN_MAX_AGE', 500), ssl_require='sslmode' not in furl(env('DATABASE_URL', '')).args, )) # Password validation # https://docs.djangoproject.com/en/3.0/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', }, { 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', }, {
def update(self): """ Downloads the latest source tarball from github and installs it over the existing version. """ base_url = furl(self.repositoryBase) base_url.path.add(self.repository) base_url.path.add("tarball") base_url.path.add(self.branch) tar_download_url = base_url.url main_dir = os.path.dirname(os.path.dirname(__file__)) try: self.backup() # prepare the update dir update_dir = os.path.join(main_dir, 'update') if os.path.isdir(update_dir): logger.info("Clearing out update folder " + update_dir + " before extracting") shutil.rmtree(update_dir) logger.info("Creating update folder " + update_dir + " before extracting") os.makedirs(update_dir) # retrieve file logger.info("Downloading update from " + repr(tar_download_url)) tar_download_path = os.path.join(update_dir, 'sb-update.tar') urllib.urlretrieve(tar_download_url, tar_download_path) if not os.path.isfile(tar_download_path): logger.error("Unable to retrieve new version from " + tar_download_url + ", can't update") return False if not tarfile.is_tarfile(tar_download_path): logger.error("Retrieved version from " + tar_download_url + " is corrupt, can't update") return False # extract to sb-update dir logger.info("Extracting update file " + tar_download_path) tar = tarfile.open(tar_download_path) tar.extractall(update_dir) tar.close() # delete .tar.gz logger.info("Deleting update file " + tar_download_path) os.remove(tar_download_path) # find update dir name update_dir_contents = [x for x in os.listdir(update_dir) if os.path.isdir(os.path.join(update_dir, x))] if len(update_dir_contents) != 1: logger.error("Invalid update data, update failed: " + str(update_dir_contents)) return False content_dir = os.path.join(update_dir, update_dir_contents[0]) dontUpdateThese = ["nssm.exe"]#("msvcm90.dll", "msvcr90.dll", "msvcm90.dll") #rename exes, pyd and dll files so they can be overwritten filesToRename = [] for filename in os.listdir(main_dir): if (filename.endswith(".pyd") or filename.endswith(".dll") or filename.endswith(".exe")) and filename not in dontUpdateThese: filesToRename.append((filename, filename + ".updated")) logger.info("Renaming %d files so they can be overwritten" % len(filesToRename)) for toRename in filesToRename: logger.debug("Renaming %s to %s" % (toRename[0], toRename[1])) shutil.move(toRename[0], toRename[1]) # walk temp folder and move files to main folder logger.info("Moving files from " + content_dir + " to " + main_dir) for dirname, dirnames, filenames in os.walk(content_dir): dirname = dirname[len(content_dir) + 1:] for curfile in filenames: if curfile not in dontUpdateThese: old_path = os.path.join(content_dir, dirname, curfile) new_path = os.path.join(main_dir, dirname, curfile) logger.debug("Updating %s" % curfile) if os.path.isfile(new_path): os.remove(new_path) os.renames(old_path, new_path) else: logger.debug("Skipping %s" % curfile) except Exception as e: logger.error("Error while trying to update: " + str(e)) return False logger.info("Update successful") return True
def parse(self, response): """ Required first level page parser. :param response: The response instance from ``start_requests`` :type response: scrapy.Request :returns: Yields torrent items :rtype: list[items.Torrent] """ soup = self.get_soup(response.text) try: results = soup\ .find('table', {'id': 'searchResult'})\ .find_all('tr')[1:] except AttributeError: return for result in results: torrent = items.Torrent(spider=self.name) torrent['categories'] = [ self._category_map.get( furl.furl(category.attrs['href']).path.segments[-1], items.TorrentCategory.Unknown ) for category in result.find( 'td', {'class': 'vertTh'} ).find_all('a') ] torrent['magnet'] = result.find( 'a', {'href': re.compile('^magnet\:.*')} )['href'] torrent['hash'] = re.match( r'.*magnet:\?xt=urn:(?:btih)+:([a-zA-Z0-9]+).*', torrent['magnet'] ).groups()[0].lower() (torrent['seeders'], torrent['leechers'],) = tuple([ int(column.contents[0]) for column in result.find_all('td', {'align': 'right'}) ]) result_links = result.find('a', {'class': 'detLink'}) if 'href' in result_links.attrs: torrent['source'] = furl.furl(response.url).set( path=result_links.attrs['href'], args={} ).url torrent['name'] = result_links.contents[0].strip() result_desc = result.find('font', {'class': 'detDesc'}) (time_content, size_content,) = \ result_desc.contents[0].split(',')[:2] torrent['uploaded'] = self.parse_datetime( time_content.split(' ')[-1], formats=[ '%m-%d %Y', '%m-%d %H:%M', '%H:%M', 'Y-day %H:%M' ] ) torrent['size'] = self.parse_size( size_content.split(' ')[-1] ) try: torrent['uploader'] = result_desc.find( 'a', {'href': re.compile('^/user/.*')} ).contents[0] except AttributeError: pass yield torrent
def get_profile_url(self): url = furl.furl(self.BASE_URL) url.path.segments.extend(('oauth2', 'profile',)) return url.url
def resolve(self, context=None, request=None, resolved_object=None): if not context and not request: raise ImproperlyConfigured( 'Must provide a context or a request in order to resolve the ' 'link.') AccessControlList = apps.get_model(app_label='acls', model_name='AccessControlList') if not context: context = RequestContext(request=request) if not request: # Try to get the request object the faster way and fallback to the # slower method. try: request = context.request except AttributeError: request = Variable('request').resolve(context) current_path = request.META['PATH_INFO'] current_view_name = resolve(current_path).view_name # ACL is tested agains the resolved_object or just {{ object }} if not if not resolved_object: try: resolved_object = Variable('object').resolve(context=context) except VariableDoesNotExist: pass # If this link has a required permission check that the user has it # too if self.permissions: if resolved_object: try: AccessControlList.objects.check_access( obj=resolved_object, permissions=self.permissions, user=request.user) except PermissionDenied: return None else: try: Permission.check_user_permissions( permissions=self.permissions, user=request.user) except PermissionDenied: return None # Check to see if link has conditional display function and only # display it if the result of the conditional display function is # True if self.condition: if not self.condition(context): return None resolved_link = ResolvedLink(current_view_name=current_view_name, link=self) if self.view: view_name = Variable('"{}"'.format(self.view)) if isinstance(self.args, list) or isinstance(self.args, tuple): # TODO: Don't check for instance check for iterable in try/except # block. This update required changing all 'args' argument in # links.py files to be iterables and not just strings. args = [Variable(arg) for arg in self.args] else: args = [Variable(self.args)] # If we were passed an instance of the view context object we are # resolving, inject it into the context. This help resolve links for # object lists. if resolved_object: context['resolved_object'] = resolved_object try: kwargs = self.kwargs(context) except TypeError: # Is not a callable kwargs = self.kwargs kwargs = {key: Variable(value) for key, value in kwargs.items()} # Use Django's exact {% url %} code to resolve the link node = URLNode(view_name=view_name, args=args, kwargs=kwargs, asvar=None) try: resolved_link.url = node.render(context) except Exception as exception: logger.error('Error resolving link "%s" URL; %s', self.text, exception) elif self.url: resolved_link.url = self.url # This is for links that should be displayed but that are not clickable if self.conditional_disable: resolved_link.disabled = self.conditional_disable(context) else: resolved_link.disabled = False # Lets a new link keep the same URL query string of the current URL if self.keep_query: # Sometimes we are required to remove a key from the URL QS parsed_url = furl( force_str(request.get_full_path() or request.META.get( 'HTTP_REFERER', reverse(setting_home_view.value)))) for key in self.remove_from_query: try: parsed_url.query.remove(key) except KeyError: pass # Use the link's URL but with the previous URL querystring new_url = furl(resolved_link.url) new_url.args = parsed_url.querystr resolved_link.url = new_url.url resolved_link.context = context return resolved_link
# 路径分析 from furl import furl from PIL import Image import hashlib GROUP_START = 1 GROUP_END = 5 # 1.请求这个url可以看到他是以xhr(ajax)的方法请求的 headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' } f = furl('https://www.toutiao.com/search_content/?offset=90&format=json&keyword=%E8%A1%97%E6%8B%8D&autoload=true&count=20') def get_page(offset, keyword): ''' 获取页面 ''' # 构建新的url f.args['offset'] = offset f.args['keyword'] = keyword try: respone = requests.get(f.url, headers) # 判断返回状态 if respone.status_code == requests.codes.ok: return respone.json()
def run(arguments): arguments.config = arguments.config if os.path.isabs( arguments.config) else os.path.join(nzbhydra.getBasePath(), arguments.config) arguments.database = arguments.database if os.path.isabs( arguments.database) else os.path.join(nzbhydra.getBasePath(), arguments.database) nzbhydra.configFile = settings_file = arguments.config nzbhydra.databaseFile = database_file = arguments.database logger.notice("Loading settings from {}".format(settings_file)) try: config.load(settings_file) config.save(settings_file) # Write any new settings back to the file log.setup_custom_logger(arguments.logfile, arguments.quiet) except Exception: print( "An error occured during migrating the old config. Sorry about that...: " ) traceback.print_exc(file=sys.stdout) print("Trying to log messages from migration...") config.logLogMessages() os._exit(-5) try: logger.info("Started") if arguments.daemon: logger.info("Daemonizing...") daemonize(arguments.pidfile) config.logLogMessages() if arguments.clearloganddb: logger.warning("Deleting log file and database now as requested") try: logger.warning("Deleting database file %s" % database_file) os.unlink(database_file) except Exception as e: logger.error("Unable to close or delete log file: %s" % e) try: handler = logger.handlers[1] if len( logger.handlers) == 2 else logger.handlers[0] filename = handler.stream.name if filename and os.path.exists(filename): logger.warn("Deleting file %s" % filename) handler.flush() handler.close() logger.removeHandler(handler) os.unlink(filename) logger.addHandler(handler) except Exception as e: print("Unable to close or delete log file: %s" % e) try: import _sqlite3 logger.debug("SQLite3 version: %s" % _sqlite3.sqlite_version) except: logger.error("Unable to log SQLite version") logger.info("Loading database file %s" % database_file) if not os.path.exists(database_file): database.init_db(database_file) else: database.update_db(database_file) logger.info("Starting db") indexers.read_indexers_from_config() if config.settings.main.debug: logger.info("Debug mode enabled") # Clean up any "old" files from last update oldfiles = glob.glob("*.updated") if len(oldfiles) > 0: logger.info("Deleting %d old files remaining from update" % len(oldfiles)) for filename in oldfiles: try: if "hydratray" not in filename: logger.debug("Deleting %s" % filename) os.remove(filename) else: logger.debug( "Not deleting %s because it's still running. TrayHelper will restart itself" % filename) except Exception: logger.warn( "Unable to delete old file %s. Please delete manually" % filename) host = config.settings.main.host if arguments.host is None else arguments.host port = config.settings.main.port if arguments.port is None else arguments.port socksproxy = config.settings.main.socksProxy if arguments.socksproxy is None else arguments.socksproxy if socksproxy: webaccess.set_proxies(socksproxy) elif config.settings.main.httpProxy: webaccess.set_proxies(config.settings.main.httpProxy, config.settings.main.httpsProxy) logger.notice("Starting web app on %s:%d" % (host, port)) if config.settings.main.externalUrl is not None and config.settings.main.externalUrl != "": f = furl(config.settings.main.externalUrl) else: f = furl() f.host = "127.0.0.1" if config.settings.main.host == "0.0.0.0" else config.settings.main.host f.port = port f.scheme = "https" if config.settings.main.ssl else "http" if not arguments.nobrowser and config.settings.main.startupBrowser: if arguments.restarted: logger.info("Not opening the browser after restart") else: logger.info("Opening browser to %s" % f.url) webbrowser.open_new(f.url) else: logger.notice("Go to %s for the frontend" % f.url) web.run(host, port, basepath) except Exception: logger.exception("Fatal error occurred")
def make_response_from_ticket(ticket, service_url): """ Given a CAS ticket and service URL, attempt to validate the user and return a proper redirect response. :param str ticket: CAS service ticket :param str service_url: Service URL from which the authentication request originates :return: redirect response """ service_furl = furl.furl(service_url) # `service_url` is guaranteed to be removed of `ticket` parameter, which has been pulled off in # `framework.sessions.before_request()`. if 'ticket' in service_furl.args: service_furl.args.pop('ticket') client = get_client() cas_resp = client.service_validate(ticket, service_furl.url) if cas_resp.authenticated: user, external_credential, action = get_user_from_cas_resp(cas_resp) # user found and authenticated if user and action == 'authenticate': # if we successfully authenticate and a verification key is present, invalidate it if user.verification_key: user.verification_key = None user.save() # if user is authenticated by external IDP, ask CAS to authenticate user for a second time # this extra step will guarantee that 2FA are enforced # current CAS session created by external login must be cleared first before authentication if external_credential: user.verification_key = generate_verification_key() user.save() return redirect( get_logout_url( get_login_url(service_url, username=user.username, verification_key=user.verification_key))) # if user is authenticated by CAS # TODO [CAS-27]: Remove Access Token From Service Validation return authenticate(user, cas_resp.attributes.get('accessToken', ''), redirect(service_furl.url)) # first time login from external identity provider if not user and external_credential and action == 'external_first_login': from website.util import web_url_for # orcid attributes can be marked private and not shared, default to orcid otherwise fullname = u'{} {}'.format( cas_resp.attributes.get('given-names', ''), cas_resp.attributes.get('family-name', '')).strip() # TODO [CAS-27]: Remove Access Token From Service Validation user = { 'external_id_provider': external_credential['provider'], 'external_id': external_credential['id'], 'fullname': fullname, 'access_token': cas_resp.attributes.get('accessToken', ''), 'service_url': service_furl.url, } return external_first_login_authenticate( user, redirect(web_url_for('external_login_email_get'))) # Unauthorized: ticket could not be validated, or user does not exist. return redirect(service_furl.url)
def addon_view_or_download_file(auth, path, provider, **kwargs): extras = request.args.to_dict() extras.pop('_', None) # Clean up our url params a bit action = extras.get('action', 'view') node = kwargs.get('node') or kwargs['project'] node_addon = node.get_addon(provider) if not path: raise HTTPError(httplib.BAD_REQUEST) if not isinstance(node_addon, StorageAddonBase): raise HTTPError( httplib.BAD_REQUEST, { 'message_short': 'Bad Request', 'message_long': 'The add-on containing this file is no longer connected to the {}.' .format(node.project_or_component) }) if not node_addon.has_auth: raise HTTPError( httplib.UNAUTHORIZED, { 'message_short': 'Unauthorized', 'message_long': 'The add-on containing this file is no longer authorized.' }) if not node_addon.complete: raise HTTPError( httplib.BAD_REQUEST, { 'message_short': 'Bad Request', 'message_long': 'The add-on containing this file is no longer configured.' }) file_node = FileNode.resolve_class(provider, FileNode.FILE).get_or_create( node, path) # Note: Cookie is provided for authentication to waterbutler # it is overriden to force authentication as the current user # the auth header is also pass to support basic auth version = file_node.touch( request.headers.get('Authorization'), **dict(extras, cookie=request.cookies.get(settings.COOKIE_NAME))) if version is None: if file_node.get_guid(): # If this file has been successfully view before but no longer exists # Move file to trashed file node if not TrashedFileNode.load(file_node._id): file_node.delete() # Show a nice error message return addon_deleted_file(file_node=file_node, **kwargs) raise HTTPError( httplib.NOT_FOUND, { 'message_short': 'Not Found', 'message_long': 'This file does not exist' }) # TODO clean up these urls and unify what is used as a version identifier if request.method == 'HEAD': return make_response(('', 200, { 'Location': file_node.generate_waterbutler_url( **dict(extras, direct=None, version=version.identifier)) })) if action == 'download': return redirect( file_node.generate_waterbutler_url( **dict(extras, direct=None, version=version.identifier))) if len(request.path.strip('/').split('/')) > 1: guid = file_node.get_guid(create=True) return redirect( furl.furl('/{}/'.format(guid._id)).set(args=extras).url) return addon_view_file(auth, node, file_node, version)
def fetch_by_id(self, provider_id): url = furl(self.url) url.args['verb'] = 'GetRecord' url.args['metadataPrefix'] = self.metadata_prefix url.args['identifier'] = provider_id return etree.tostring(self.fetch_page(url)[0][0])
def get_application_revocation_url(self): url = furl.furl(self.BASE_URL) url.path.segments.extend(('oauth2', 'revoke')) return url.url
def something_in_stock_mass(self): for i in range(len(self.asin_list)): params = {} for x in range(len(self.asin_list[i])): params[f"ASIN.{x + 1}"] = self.asin_list[i][x] params[f"Quantity.{x + 1}"] = 1 f = furl(AMAZON_URLS["CART_URL"]) f.set(params) self.driver.get(f.url) title = self.driver.title bad_list_flag = False if title in DOGGO_TITLES: good_asin_list = [] for asin in self.asin_list[i]: checkparams = {} checkparams[f"ASIN.1"] = asin checkparams[f"Quantity.1"] = 1 check = furl(AMAZON_URLS["CART_URL"]) check.set(checkparams) self.driver.get(check.url) sanity_check = self.driver.title if sanity_check in DOGGO_TITLES: log.error(f"{asin} blocked from bulk adding by Amazon") else: log.info(f"{asin} appears to allow adding") good_asin_list.append(asin) time.sleep(1) if len(good_asin_list) > 0: log.info( "Revising ASIN list to include only good ASINs listed above" ) self.asin_list[i] = good_asin_list else: log.error(f"No ASINs work in list {i + 1}.") self.asin_list[i] = self.asin_list[i][ 0] # just assign one asin to list, can't remove during execution bad_list_flag = True if bad_list_flag: continue self.check_if_captcha(self.wait_for_pages, ADD_TO_CART_TITLES) price_element = self.driver.find_elements_by_xpath( '//td[@class="price item-row"]') if price_element: price_flag = False price_warning_flag = False for price_e in price_element: str_price = price_e.text log.info(f"Item Cost: {str_price}") price = parse_price(str_price) priceFloat = price.amount if priceFloat is None: log.error("Error reading price information on page.") elif priceFloat <= self.reserve[i]: log.info("Item in stock and under reserve!") price_flag = True else: log.info("Item greater than reserve price") price_warning_flag = True if price_flag: log.info("Attempting to purchase") if price_warning_flag: log.info( "Cart included items below and above reserve price, cancel unwanted items ASAP!" ) self.take_screenshot("attempting-to-purchase") return i + 1 return 0
def normalize_url(url: str) -> str: """Normalize URL * Fix common mistypes, e.g. "http://http://..." * Run URL through normalization, i.e. standardize URL's scheme and hostname case, remove default port, uppercase all escape sequences, un-escape octets that can be represented as plain characters, remove whitespace before / after the URL string) * Remove #fragment * Remove various ad tracking query parameters, e.g. "utm_source", "utm_medium", "PHPSESSID", etc. Return normalized URL on success; raise on error""" url = decode_object_from_bytes_if_needed(url) if url is None: raise McNormalizeURLException("URL is None") if len(url) == 0: raise McNormalizeURLException("URL is empty") log.debug("normalize_url: " + url) url = fix_common_url_mistakes(url) try: url = canonical_url(url) except Exception as ex: raise McNormalizeURLException("Unable to get canonical URL: %s" % str(ex)) if not is_http_url(url): raise McNormalizeURLException("URL is not HTTP(s): %s" % url) uri = furl(url) # Remove #fragment uri.fragment.set(path='') parameters_to_remove = [] # Facebook parameters (https://developers.facebook.com/docs/games/canvas/referral-tracking) parameters_to_remove += [ 'fb_action_ids', 'fb_action_types', 'fb_source', 'fb_ref', 'action_object_map', 'action_type_map', 'action_ref_map', 'fsrc_fb_noscript', ] # metrika.yandex.ru parameters parameters_to_remove += [ 'yclid', '_openstat', ] if 'facebook.com' in uri.host.lower(): # Additional parameters specifically for the facebook.com host parameters_to_remove += [ 'ref', 'fref', 'hc_location', ] if 'nytimes.com' in uri.host.lower(): # Additional parameters specifically for the nytimes.com host parameters_to_remove += [ 'emc', 'partner', '_r', 'hp', 'inline', 'smid', 'WT.z_sma', 'bicmp', 'bicmlukp', 'bicmst', 'bicmet', 'abt', 'abg', ] if 'livejournal.com' in uri.host.lower(): # Additional parameters specifically for the livejournal.com host parameters_to_remove += [ 'thread', 'nojs', ] if 'google.' in uri.host.lower(): # Additional parameters specifically for the google.[com,lt,...] host parameters_to_remove += [ 'gws_rd', 'ei', ] # Some other parameters (common for tracking session IDs, advertising, etc.) parameters_to_remove += [ 'PHPSESSID', 'PHPSESSIONID', 'cid', 's_cid', 'sid', 'ncid', 'ir', 'ref', 'oref', 'eref', 'ns_mchannel', 'ns_campaign', 'ITO', 'wprss', 'custom_click', 'source', 'feedName', 'feedType', 'skipmobile', 'skip_mobile', 'altcast_code', ] # Make the sorting default (e.g. on Reddit) # Some other parameters (common for tracking session IDs, advertising, etc.) parameters_to_remove += ['sort'] # Some Australian websites append the "nk" parameter with a tracking hash if 'nk' in uri.query.params: for nk_value in uri.query.params['nk']: if re.search(r'^[0-9a-fA-F]+$', nk_value, re.I): parameters_to_remove += ['nk'] break # Delete the "empty" parameter (e.g. in http://www-nc.nytimes.com/2011/06/29/us/politics/29marriage.html?=_r%3D6) parameters_to_remove += [''] # Remove cruft parameters for parameter in parameters_to_remove: if ' ' in parameter: log.warning('Invalid cruft parameter "%s"' % parameter) uri.query.params.pop(parameter, None) for name in list( uri.query.params.keys()): # copy of list to be able to delete # Remove parameters that start with '_' (e.g. '_cid') because they're # more likely to be the tracking codes if name.startswith('_'): uri.query.params.pop(name, None) # Remove GA parameters, current and future (e.g. "utm_source", # "utm_medium", "ga_source", "ga_medium") # (https://support.google.com/analytics/answer/1033867?hl=en) if name.startswith('ga_') or name.startswith('utm_'): uri.query.params.pop(name, None) url = uri.url # Remove empty values in query string, e.g. http://bash.org/?244321= url = url.replace('=&', '&') url = re.sub(r'=$', '', url) return url
# 获取图片 img = soup.find_all("img") # 保存图片url imgs = [] for item in img: imgs.append(item.get('data-src')) # 下载图片 # 保存图片,思路:将所有的图片保存在本地的一个文件夹下,用图片的url链接的后缀名来命名 dir_name = soup.select('h2')[0].string # 设置文件夹的名字 dir_name = dir_name.replace("\n", "") dir_name = dir_name.strip() print('dir_name', dir_name) if not os.path.exists(str(dir_name)): # os模块判断并创建 os.mkdir(dir_name) # 保存图片 picture_name = 0 for img_url in imgs: time.sleep(1) # 设置间隔时间,防止把网页爬崩 if img_url: f = furl(img_url) reponse = requests.get(img_url) # 还有遍历的作用 with open(dir_name+'/'+str(picture_name)+str('.') + str(f.args['wx_fmt']), 'wb') as f: f.write(reponse.content) picture_name += 1
def get_base_url(self, url): url = furl.furl(url) return '{}://{}'.format(url.scheme, url.host)
async def test_conductor (conductor, simpleWebServer): serverSocketPath, c = conductor socketPath, runner = simpleWebServer user = getpass.getuser () key = 'foobar' auth = '123' conn = aiohttp.UnixConnector(path=serverSocketPath) async with aiohttp.ClientSession(connector=conn) as session: async with session.get(f'http://{key}-{user}.conductor.local/_conductor/status') as resp: assert resp.status == 200 o = await resp.json () assert o['routesTotal'] == 0 assert o['requestTotal'] == 1 assert o['requestActive'] == 1 assert o['noroute'] == 1 # make sure that requests are properly counted and requestActive is decreased reader, writer = await asyncio.open_unix_connection (path=serverSocketPath) writer.write (b'invalid http request\n') writer.close () await writer.wait_closed () async with session.get(f'http://{key}-{user}.conductor.local/_conductor/status') as resp: assert resp.status == 200 o = await resp.json () assert o['requestTotal'] == 3 assert o['requestActive'] == 1 async with session.get(f'http://{key}-{user}.conductor.local/_conductor/nonexistent') as resp: assert resp.status == 404 async with session.get(f'http://{key}-{user}.conductor.local/') as resp: assert resp.status == 404 routeKey = RouteKey (key=key, user=user) route = Route (key=routeKey, auth=c.hashKey (auth), socket=socketPath) c.addRoute (route) for u in (f'http://nonexistent-{user}.conductor.local', 'http://invalidpattern.conductor.local', 'http://different.domain'): async with session.get(u) as resp: assert resp.status == 404 async with session.get(f'http://{key}-{user}.conductor.local') as resp: assert resp.status == 403 # add unrelated cookie session.cookie_jar.update_cookies ({'unrelated': 'value'}) async with session.get(f'http://{key}-{user}.conductor.local') as resp: assert resp.status == 403 async with session.get(f'http://{key}-{user}.conductor.local/_conductor/auth/{auth}') as resp: assert resp.status == 200 assert await resp.text() == 'Hello, world' # make sure responses larger than any buffer work async with session.get(f'http://{key}-{user}.conductor.local/large') as resp: assert resp.status == 200 assert await resp.text() == 'a'*(1024*1024) async with session.get(f'http://{key}-{user}.conductor.local/_conductor/auth/{auth}?next=/nonexistent') as resp: assert resp.status == 404 assert furl (resp.url).path == '/nonexistent' # big request headers = dict ([(f'key-{i}', 'val') for i in range (101)]) async with session.get(f'http://{key}-{user}.conductor.local/', headers=headers) as resp: assert resp.status == 400 # destroy application await runner.cleanup() async with session.get(f'http://{key}-{user}.conductor.local/') as resp: assert resp.status == 502 c.deleteRoute (routeKey) async with session.get(f'http://{key}-{user}.conductor.local/') as resp: assert resp.status == 404
def fetch_xml(self, file_name): file_url = furl(self.BASE_DATA_URL.format(file_name)) # Not using self.requests when getting the file contents because the eLife rate limit (1, 60) does not apply resp = requests.get(file_url.url) xml = etree.XML(resp.content) return xml