def sess(self, url, tmpdir): self.url = url self.cache = FileCache(str(tmpdir)) sess = CacheControl(requests.Session(), cache=self.cache) yield sess # closing session object sess.close()
def __init__( self, fetcher=None, # type: Optional[Fetcher] namespaces=None, # type: Optional[Dict[Text, Text]] fileuri=None, # type: Optional[Text] copyfrom=None, # type: Optional[LoadingOptions] schemas=None, # type: Optional[List[Text]] original_doc=None, # type: Optional[Any] ): # type: (...) -> None self.idx = {} # type: Dict[Text, Text] self.fileuri = fileuri # type: Optional[Text] self.namespaces = namespaces self.schemas = schemas self.original_doc = original_doc if copyfrom is not None: self.idx = copyfrom.idx if fetcher is None: fetcher = copyfrom.fetcher if fileuri is None: self.fileuri = copyfrom.fileuri if namespaces is None: self.namespaces = copyfrom.namespaces if namespaces is None: schemas = copyfrom.schemas if fetcher is None: import requests from cachecontrol.wrapper import CacheControl from cachecontrol.caches import FileCache from schema_salad.ref_resolver import DefaultFetcher if "HOME" in os.environ: session = CacheControl( requests.Session(), cache=FileCache( os.path.join(os.environ["HOME"], ".cache", "salad")), ) elif "TMPDIR" in os.environ: session = CacheControl( requests.Session(), cache=FileCache( os.path.join(os.environ["TMPDIR"], ".cache", "salad")), ) else: session = CacheControl(requests.Session(), cache=FileCache("/tmp", ".cache", "salad")) self.fetcher = DefaultFetcher({}, session) # type: Fetcher else: self.fetcher = fetcher self.vocab = _vocab self.rvocab = _rvocab if namespaces is not None: self.vocab = self.vocab.copy() self.rvocab = self.rvocab.copy() for k, v in iteritems(namespaces): self.vocab[k] = v self.rvocab[v] = k
def test_simple_lockfile_arg(self, tmpdir, value, expected): if value is not None: cache = FileCache(str(tmpdir), use_dir_lock=value) else: cache = FileCache(str(tmpdir)) assert issubclass(cache.lock_class, expected)
class TestStorageFileCache(object): @pytest.fixture() def sess(self, server): self.url = server.application_url self.cache = FileCache(STORAGE_FOLDER) sess = CacheControl(requests.Session(), cache=self.cache) return sess def test_filecache_from_cache(self, sess): response = sess.get(self.url) assert not response.from_cache response = sess.get(self.url) assert response.from_cache def test_key_length(self, sess): """ Hash table keys: Most file systems have a 255 characters path limitation. * Make sure hash method does not produce too long keys * Ideally hash method generate fixed length keys """ url0 = url1 = 'http://example.org/res?a=1' while len(url0) < 255: url0 += randomdata() url1 += randomdata() assert len(self.cache.encode(url0)) < 200 assert len(self.cache.encode(url0)) == len(self.cache.encode(url1))
def test_simple_lockfile_arg(self, tmpdir, value, expected): if value is not None: cache = FileCache(str(tmpdir), use_dir_lock=value) else: cache = FileCache(str(tmpdir)) assert issubclass(cache.lock_class, expected) cache.close()
def __init__(self, fetcher=None, namespaces=None, fileuri=None, copyfrom=None, schemas=None): if copyfrom is not None: self.idx = copyfrom.idx if fetcher is None: fetcher = copyfrom.fetcher if fileuri is None: fileuri = copyfrom.fileuri if namespaces is None: namespaces = copyfrom.namespaces if namespaces is None: schemas = copyfrom.schemas else: self.idx = {} if fetcher is None: import os import requests from cachecontrol.wrapper import CacheControl from cachecontrol.caches import FileCache from schema_salad.ref_resolver import DefaultFetcher if "HOME" in os.environ: session = CacheControl(requests.Session(), cache=FileCache( os.path.join( os.environ["HOME"], ".cache", "salad"))) elif "TMPDIR" in os.environ: session = CacheControl(requests.Session(), cache=FileCache( os.path.join( os.environ["TMPDIR"], ".cache", "salad"))) else: session = CacheControl(requests.Session(), cache=FileCache("/tmp", ".cache", "salad")) self.fetcher = DefaultFetcher({}, session) else: self.fetcher = fetcher self.fileuri = fileuri self.vocab = _vocab self.rvocab = _rvocab self.namespaces = namespaces self.schemas = schemas if namespaces is not None: self.vocab = self.vocab.copy() self.rvocab = self.rvocab.copy() for k, v in iteritems(namespaces): self.vocab[k] = v self.rvocab[v] = k
def __init__(self, directory, forever=False, filemode=0o0600, dirmode=0o0700, max_bytes=ONE_GIGABYTE, logger=warnings): FileCache.__init__(self, directory, forever, filemode, dirmode) self.max_bytes = max_bytes self.curr_bytes = 0 self.logger = logger
def set(self, key, value): new_bytes = sys.getsizeof(value) total = (self.curr_bytes + new_bytes) if total > self.max_bytes: message = "Tried adding %d bytes but %d bytes are currently saved" \ " in the cache and the max_bytes is set to %d.\n" % \ (new_bytes, self.curr_bytes, self.max_bytes) self.logger.warn(message) return FileCache.set(self, key, value) self.curr_bytes += new_bytes
def set(self, key, value): new_bytes = sys.getsizeof(value) total = (self.curr_bytes + new_bytes) if total > self.max_bytes: message = ("Tried adding %d bytes but %d bytes are currently saved" " in the cache and the max_bytes is set to %d." % (new_bytes, self.curr_bytes, self.max_bytes)) self.logger.warning(message) return FileCache.set(self, key, value) self.curr_bytes += new_bytes
def __init__( self, user_auth, language="en-gb", user_object=None, kirkes_base_url="https://kirkes.finna.fi", kirkes_sessioncheck_url="/AJAX/JSON?method=getUserTransactions"): self.user_auth = user_auth self.language = language self.baseUrl = kirkes_base_url self.user_object = user_object self.sessionCheck_path = kirkes_sessioncheck_url self.sessionHttp = requests.Session() self.cached_sesssionHttp = CacheControl( self.sessionHttp, cache=FileCache(os.path.join(settings.BASE_DIR, '.webcache'))) cookie_obj = requests.cookies.create_cookie( domain=self.getBaseURLDomainName(), name='language', value=self.language) self.sessionHttp.cookies.set_cookie(cookie_obj) self.cached_sesssionHttp.cookies.set_cookie(cookie_obj) retry = Retry(connect=5, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retry) self.cached_sesssionHttp.mount('http://', adapter) self.sessionHttp.mount('https://', adapter) if settings.USE_PROXY: get_tor_session(self.sessionHttp) get_tor_session(self.cached_sesssionHttp)
class Settings: do_update_wikidata = True # Don't activate this, it's most likely broken do_update_wikipedia = False sparql_file = "free_software_items.rq" oauth_token_file = "github_oauth_token.txt" # pywikibot is too stupid to cache the calendar model, so let's do this manually calendarmodel = pywikibot.Site().data_repository().calendarmodel() wikidata_repo = pywikibot.Site("wikidata", "wikidata").data_repository() repo_regex = re.compile(r"https://github.com/[^/]+/[^/]+") version_regex = re.compile(r"\d+(\.\d+)+") unmarked_prerelease_regex = re.compile( r"[ -._\d](b|r|rc|beta|alpha)([ .\d].*)?$", re.IGNORECASE) cached_session = CacheControl(requests.Session(), cache=FileCache('cache', forever=True), heuristic=LastModified()) properties = { "software version": "P348", "publication date": "P577", "retrieved": "P813", "reference URL": "P854", "official website": "P856", "source code repository": "P1324", }
def __init__(self, api_key=None, locale=None, anonymize=False, exclude_episodes=False, user_agent=None, cache=None, proxy_uri=None, verify_ssl=True): self.api_key = api_key or SHA1_KEY self.timestamp = time.mktime(datetime.date.today().timetuple()) self.user_agent = user_agent or random.choice(USER_AGENTS) self.locale = locale or 'en_US' self.exclude_episodes = exclude_episodes self.caching_enabled = True if cache is True else False self.proxy_uri = proxy_uri or DEFAULT_PROXY_URI self.anonymize = anonymize self.verify_ssl = verify_ssl self.session = requests if self.caching_enabled: warnings.warn('caching will be removed in version 5.0.0 ' 'due to not being thread safe') self.session = CacheControl(requests.Session(), cache=FileCache('.imdbpie_cache'))
class Settings: do_update_wikidata = True # Don't activate this, it's most likely broken do_update_wikipedia = False normalize_url = True sparql_file = "free_software_items.rq" # pywikibot is too stupid to cache the calendar model, so let's do this manually calendarmodel = pywikibot.Site().data_repository().calendarmodel() wikidata_repo = pywikibot.Site("wikidata", "wikidata").data_repository() repo_regex = re.compile(r"^[a-z]+://github.com/[^/]+/[^/]+/?$") cached_session = CacheControl( requests.Session(), cache=FileCache("cache", forever=True), heuristic=LastModified(), ) properties = { "software version": "P348", "publication date": "P577", "retrieved": "P813", "reference URL": "P854", "official website": "P856", "source code repository": "P1324", "title": "P1476", "protocol": "P2700", }
def main(argv): parser = argparse.ArgumentParser(description='Create or update cfn resource schema') parser.add_argument('--update', action='store_true') parser.add_argument('--type', metavar='TYPE', help='Restrict parsing resource type properties only to' ' type TYPE. Example: --type AWS::ApiGateway::RestApi') parser.add_argument('dest', nargs='?', help='Write resulting schema into FILE' ' instead of just printing it') args = parser.parse_args(argv[1:]) sess = CacheControl(requests.Session(), cache=FileCache('.web_cache')) requests.get = sess.get stage1 = 'resource-stage1.json' if args.update: if not args.dest: print >> sys.stderr, ('Error: if --update is given, `dest` must be' ' specified too') return 2 stage1_schema = tools.load(stage1) resource_schema = tools.load(args.dest) resource_schema['definitions']['resource_template'] = \ stage1_schema['definitions']['resource_template'] else: resource_schema = tools.load(stage1) resource_type_names = tools.get_all_resource_type_names() tools.update_all_resource_patterns_by_name( resource_schema, resource_type_names ) if args.type: resource_type_names = [args.type] for resource_type_name in resource_type_names: print >> sys.stderr, resource_type_name resource_properties.set_resource_properties(resource_schema, resource_type_name) del resource_schema['definitions']['resource_template'] all_properties = resource_properties.all_res_properties() resource_schema['definitions']['property_types'] = all_properties for rpt_name, rpt_schema in all_properties.items(): print >> sys.stderr, rpt_name resource_properties.set_resource_property_type_properties( resource_schema, rpt_name ) tweak_resource_schema.apply_all_tweaks(resource_schema) if args.dest: tools.write(resource_schema, args.dest) else: print tools.print_(resource_schema) return 0
def open(self): global SESSION if SESSION is None: SESSION = CacheControl(Session(), cache=FileCache(SESSION_CACHE_PATH)) try: self._response = SESSION.get(self.uri, headers=self.headers) except InvalidSchema as e: raise DocumentNotFoundException( u'document not found: "{0}"'.format(self.uri), cause=e) except ConnectionError as e: raise LoaderException(u'request connection error: "{0}"'.format( self.uri), cause=e) except Exception as e: raise LoaderException(u'request error: "{0}"'.format(self.uri), cause=e) status = self._response.status_code if status == 404: self._response = None raise DocumentNotFoundException( u'document not found: "{0}"'.format(self.uri)) elif status != 200: self._response = None raise LoaderException(u'request error {0:d}: "{1}"'.format( status, self.uri))
def fetch(self): feed = None if InformantConfig().get_argv_use_cache(): cachefile = InformantConfig().get_cachefile() os.umask( 0o0002 ) # unrestrict umask so we can cache with proper permissions try: session = CacheControl(requests.Session(), cache=FileCache(cachefile, filemode=0o0664, dirmode=0o0775)) feed = feedparser.parse(session.get(self.url).content) except Exception as e: ui.err_print('Unable to read cache information: {}'.format(e)) feed = feedparser.parse(self.url) else: feed = feedparser.parse(self.url) if feed.bozo: ui.err_print('Encountered feed error: {}'.format( feed.bozo_exception)) sys.exit(255) else: return feed
def requests_session(nocache=False): if nocache: return requests.Session() return CacheControl( requests.Session(), cache=FileCache(CACHE_FILENAME) )
def get_events_from_icalendars(): global now, midnight now = localtz.localize(datetime.datetime.now()) midnight = localtz.localize(datetime.datetime.combine(now, datetime.time(0,0,0))) cz = Calzone() session = FuturesSession() session.mount('https://', CacheControlAdapter(cache=FileCache('.webcache'), heuristic=ForceCacheHeuristic())) cals = {k: session.get(u) for k,u in calendars.items()} concurrent.futures.wait(cals.values()) for k,req in cals.items(): try: cz.load(req.result().text) except Exception as err: print("Failed to load calendar '{}'".format(k)) print (err) try: events = cz.get_events(midnight, midnight + datetime.timedelta(days=90)) except Exception as e: print (e) events.sort(key=lambda e: e.start) return events
def cli(ctx, url, token): spinner = Halo(text="Login and fetch forks", spinner="dots") spinner.start() if token: gh = github3.login(token=token) else: user = click.prompt("username", hide_input=False, confirmation_prompt=False) password = click.prompt("Password", hide_input=True, confirmation_prompt=True) gh = github3.login(user, password=password) cachecontrol.CacheControl(gh.session, cache=FileCache(".fork_work_cache"), heuristic=OneDayHeuristic()) login, repo = urlparse(url).path[1:].split("/") repository = gh.repository(login, repo) forks = repository.forks() spinner.stop() RepoCtx = namedtuple("Repo", ["repository", "forks", "gh"]) ctx.obj = RepoCtx(repo, forks, gh)
def test_file_cache_recognizes_consumed_file_handle(self, url): s = CacheControl(Session(), FileCache("web_cache")) the_url = url + "cache_60" s.get(the_url) r = s.get(the_url) assert r.from_cache s.close()
class Settings: do_update_wikidata = True # Read also tags if a project doesn't use githubs releases read_tags = True normalize_repo_url = True blacklist_page = "User:Github-wiki-bot/Exceptions" whitelist_page = "User:Github-wiki-bot/Whitelist" blacklist: List[str] = [] whitelist: List[str] = [] sparql_file = "free_software_items.rq" license_sparql_file = "free_licenses.rq" licenses: Dict[str, str] = {} # pywikibot is too stupid to cache the calendar model, so let's do this manually calendarmodel = pywikibot.Site().data_repository().calendarmodel() wikidata_repo = pywikibot.Site("wikidata", "wikidata").data_repository() repo_regex = re.compile(r"^[a-z]+://github.com/[^/]+/[^/]+/?$") cached_session: requests.Session = CacheControl(requests.Session(), cache=FileCache("cache"))
def main(argv): sess = CacheControl(requests.Session(), cache=FileCache('.web_cache')) requests.get = sess.get schema = tools.load('schema.json') schema['definitions']['Parameter']['properties'] = parse_parameters() tools.write(schema, 'schema.json')
def get_reader(self): sess = CacheControl(requests.Session(), cache=FileCache(gettempdir())) req = sess.get(self.file) # if the response is not 200, an exception will be raised req.raise_for_status() return io.BufferedReader(io.BytesIO(req.content))
def get_cached_session(caching=True): if not caching: return requests.Session() CACHE_DIR = 'web_cache' return CacheControl(requests.Session(), cache=FileCache(CACHE_DIR), heuristic=LastModifiedNoDate(require_date=False))
def session(self): if self._session is None: self._session = real_requests.Session() if CacheControlAdapter: adapter = CacheControlAdapter(cache=FileCache(".webcache")) self._session.mount("http://", adapter) self._session.mount("https://", adapter) print("Caching to .webcache") return self._session
def __init__(self): session = requests.Session() self.rootURL = "https://api.spiget.org/v2/" session.headers['User-Agent'] = "{} v{}".format(metadata.NAME, metadata.VERSION) self.session = CacheControl( session, cache=FileCache('.spl/cache') )
def get_cached_session(caching=True): if not caching: return requests.Session() return requests.Session( ) # For some reason, in concurrent environments CacheControl works quite bad. CACHE_DIR = 'web_cache' return CacheControl(requests.Session(), cache=FileCache(CACHE_DIR), heuristic=_LastModifiedNoDate(require_date=False))
def __init__(self, headers=None, cookies=None, cache_name=None, delay=1, expire_hours=12, as_string=False): ''' Base class for common scraping tasks Args: headers: dict of headers cookies: cookiejar object cache_name: should be full path delay: int (be polite!!!) expire_hours: int - default 4 as_string: get string rather than parsed json ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) if not cookies: try: import cookielib cookies = cookielib.MozillaCookieJar() except (NameError, ImportError) as e: try: import http.cookiejar cookies = http.cookiejar.MozillaCookieJar() except Exception as e: pass _s = requests.Session() _s.cookies = cookies if headers: _s.headers.update(headers) else: _s.headers.update({'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}) if cache_name: if not '/' in cache_name: cache_name = os.path.join('/tmp', cache_name) try: from cachecontrol import CacheControlAdapter from cachecontrol.heuristics import ExpiresAfter from cachecontrol.caches import FileCache _s.mount('http://', CacheControlAdapter(cache=FileCache(cache_name), cache_etags = False, heuristic=ExpiresAfter(hours=expire_hours))) except ImportError as e: try: import requests_cache requests_cache.install_cache(cache_name) except: pass self.s = _s self.urls = [] self.as_string = as_string if delay > 0: self.delay = delay else: self.delay = None
def __init__(self, destination, staging, s3_url, dry_run, cache): self.destination = destination self.staging = staging self.s3_url = s3_url self.dry_run = dry_run if cache: self.info(f"Using cache {cache}") self.fetcher = CacheControl(requests.session(), cache=FileCache(cache)) else: self.info(f"Making uncached requests") self.fetcher = requests
def __init__(self, ctx, schemagraph=None, foreign_properties=None, idx=None, cache=None, session=None): # type: (Loader.ContextType, rdflib.Graph, Set[unicode], Dict[unicode, Union[List, Dict[unicode, Any], unicode]], Dict[unicode, Any], requests.sessions.Session) -> None normalize = lambda url: urlparse.urlsplit(url).geturl() if idx is not None: self.idx = idx else: self.idx = NormDict(normalize) self.ctx = {} # type: Loader.ContextType if schemagraph is not None: self.graph = schemagraph else: self.graph = rdflib.graph.Graph() if foreign_properties is not None: self.foreign_properties = foreign_properties else: self.foreign_properties = set() if cache is not None: self.cache = cache else: self.cache = {} self.session = None # type: requests.sessions.Session if session is not None: self.session = session else: self.session = CacheControl(requests.Session(), cache=FileCache( os.path.join( os.environ["HOME"], ".cache", "salad"))) self.url_fields = None # type: Set[unicode] self.scoped_ref_fields = None # type: Dict[unicode, int] self.vocab_fields = None # type: Set[unicode] self.identifiers = None # type: Set[unicode] self.identity_links = None # type: Set[unicode] self.standalone = None # type: Set[unicode] self.nolinkcheck = None # type: Set[unicode] self.vocab = {} # type: Dict[unicode, unicode] self.rvocab = {} # type: Dict[unicode, unicode] self.idmap = None # type: Dict[unicode, Any] self.mapPredicate = None # type: Dict[unicode, unicode] self.type_dsl_fields = None # type: Set[unicode] self.add_context(ctx)
def amalgama_lyrics(artist, song): url = amalgama.get_url(artist, song) try: cached_sess = CacheControl(sess, cache=FileCache('.amalgama')) response = cached_sess.get(url) response.raise_for_status() except requests.exceptions.HTTPError: print(f'{artist}-{song} not found in amalgama {url}') return None text = amalgama.get_html(response.text) return text
def get_session(): CACHE_FOLDER.mkdir(exist_ok=True) cache = FileCache(str(CACHE_FOLDER), forever=True) cache.set("foo", b"bar") assert cache.get("foo") == b"bar" session = RateLimitingSession() # session.headers.update({"x-api-key": "something-something-darkside"}) session.mount( "https://www.metlink.org.nz/", CacheControlAdapter(heuristic=BetterExpiresAfter(days=7), cache=cache), ) session.mount( METLINK_API_URL_PREFIX, CacheControlAdapter(heuristic=BetterExpiresAfter(days=1), cache=cache), ) session.mount( METLINK_API_URL_PREFIX + "ServiceLocation/", CacheControlAdapter(heuristic=BetterExpiresAfter(seconds=90), cache=cache), ) return session
def __init__(self, destination: Path, staging: Path, s3_url: str, dry_run: bool, is_nightly_enabled: bool, cache: Optional[Path]): self.destination = destination self.staging = staging self.s3_url = s3_url self.dry_run = dry_run self.is_nightly_enabled = is_nightly_enabled if cache: self.info(f"Using cache {cache}") self.fetcher = CacheControl(requests.session(), cache=FileCache(cache)) else: self.info("Making uncached requests") self.fetcher = requests
def test_max_bytes(self, tmpdir, sess): """ Test that caches the first url but not the second because the maximum bytes have been reached for the cache. """ # use a cache with max_bytes set max_bytes = 1400 self.cache = FileCache(str(tmpdir), max_bytes=max_bytes) sess = CacheControl(requests.Session(), cache=self.cache) url1 = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4))) url2 = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4))) assert url1 != url2 # fill up the cache with url1 response = sess.get(url1) assert not response.from_cache # make sure it got into the cache response = sess.get(url1) assert response.from_cache # do url2 now response = sess.get(url2) assert not response.from_cache # make sure url2 was NOT cached response = sess.get(url2) assert not response.from_cache # clear the cache response = sess.delete(url1) assert not response.from_cache # re-add to cache since bytes should be back to 0 response = sess.get(url1) assert not response.from_cache # verify from cache again response = sess.get(url1) assert response.from_cache
class TestStorageFileCache(object): @pytest.fixture() def sess(self, url, tmpdir): self.url = url self.cache = FileCache(str(tmpdir)) sess = CacheControl(requests.Session(), cache=self.cache) yield sess # closing session object sess.close() def test_filecache_from_cache(self, sess): response = sess.get(self.url) assert not response.from_cache response = sess.get(self.url) assert response.from_cache def test_filecache_directory_not_exists(self, tmpdir, sess): url = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4))) # Make sure our cache dir doesn't exist tmp_cache = tmpdir.join('missing', 'folder', 'name').strpath assert not os.path.exists(tmp_cache) self.cache.directory = tmp_cache # trigger a cache save sess.get(url) # Now our cache dir does exist assert os.path.exists(tmp_cache) def test_filecache_directory_already_exists(self, tmpdir, sess): """ Assert no errors are raised when using a cache directory that already exists on the filesystem. """ url = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4))) # Make sure our cache dir DOES exist tmp_cache = tmpdir.join('missing', 'folder', 'name').strpath os.makedirs(tmp_cache, self.cache.dirmode) assert os.path.exists(tmp_cache) self.cache.directory = tmp_cache # trigger a cache save sess.get(url) assert True # b/c no exceptions were raised def test_key_length(self, sess): """ Hash table keys: Most file systems have a 255 characters path limitation. * Make sure hash method does not produce too long keys * Ideally hash method generate fixed length keys """ url0 = url1 = 'http://example.org/res?a=1' while len(url0) < 255: url0 += randomdata() url1 += randomdata() assert len(self.cache.encode(url0)) < 200 assert len(self.cache.encode(url0)) == len(self.cache.encode(url1)) def test_cant_use_dir_and_lock_class(self, tmpdir): with pytest.raises(ValueError): FileCache(str(tmpdir), use_dir_lock=True, lock_class=object()) @pytest.mark.parametrize( ("value", "expected"), [ (None, LockFile), (True, MkdirLockFile), (False, LockFile), ], ) def test_simple_lockfile_arg(self, tmpdir, value, expected): if value is not None: cache = FileCache(str(tmpdir), use_dir_lock=value) else: cache = FileCache(str(tmpdir)) assert issubclass(cache.lock_class, expected) cache.close() def test_lock_class(self, tmpdir): lock_class = object() cache = FileCache(str(tmpdir), lock_class=lock_class) assert cache.lock_class is lock_class cache.close() def test_filecache_with_delete_request(self, tmpdir, sess): # verifies issue #155 url = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4))) sess.delete(url) assert True # test verifies no exceptions were raised def test_filecache_with_put_request(self, tmpdir, sess): # verifies issue #155 url = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4))) sess.put(url) assert True # test verifies no exceptions were raised
def test_lock_class(self, tmpdir): lock_class = object() cache = FileCache(str(tmpdir), lock_class=lock_class) assert cache.lock_class is lock_class cache.close()
class TestStorageFileCache(object): @pytest.fixture() def sess(self, server, tmpdir): self.url = server.application_url self.cache = FileCache(str(tmpdir)) sess = CacheControl(requests.Session(), cache=self.cache) return sess def test_filecache_from_cache(self, sess): response = sess.get(self.url) assert not response.from_cache response = sess.get(self.url) assert response.from_cache def test_filecache_directory_not_exists(self, tmpdir, sess): url = self.url + "".join(sample(string.ascii_lowercase, randint(2, 4))) # Make sure our cache dir doesn't exist tmp_cache = tmpdir.join("missing", "folder", "name").strpath assert not os.path.exists(tmp_cache) self.cache.directory = tmp_cache # trigger a cache save sess.get(url) # Now our cache dir does exist assert os.path.exists(tmp_cache) def test_filecache_directory_already_exists(self, tmpdir, sess): """ Assert no errors are raised when using a cache directory that already exists on the filesystem. """ url = self.url + "".join(sample(string.ascii_lowercase, randint(2, 4))) # Make sure our cache dir DOES exist tmp_cache = tmpdir.join("missing", "folder", "name").strpath os.makedirs(tmp_cache, self.cache.dirmode) assert os.path.exists(tmp_cache) self.cache.directory = tmp_cache # trigger a cache save sess.get(url) assert True # b/c no exceptions were raised def test_key_length(self, sess): """ Hash table keys: Most file systems have a 255 characters path limitation. * Make sure hash method does not produce too long keys * Ideally hash method generate fixed length keys """ url0 = url1 = "http://example.org/res?a=1" while len(url0) < 255: url0 += randomdata() url1 += randomdata() assert len(self.cache.encode(url0)) < 200 assert len(self.cache.encode(url0)) == len(self.cache.encode(url1))
def sess(self, server): self.url = server.application_url self.cache = FileCache(STORAGE_FOLDER) sess = CacheControl(requests.Session(), cache=self.cache) return sess
class TestStorageFileCache(object): @pytest.fixture() def sess(self, server, tmpdir): self.url = server.application_url self.cache = FileCache(str(tmpdir)) sess = CacheControl(requests.Session(), cache=self.cache) return sess def test_filecache_from_cache(self, sess): response = sess.get(self.url) assert not response.from_cache response = sess.get(self.url) assert response.from_cache def test_filecache_directory_not_exists(self, tmpdir, sess): url = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4))) # Make sure our cache dir doesn't exist tmp_cache = tmpdir.join('missing', 'folder', 'name').strpath assert not os.path.exists(tmp_cache) self.cache.directory = tmp_cache # trigger a cache save sess.get(url) # Now our cache dir does exist assert os.path.exists(tmp_cache) def test_filecache_directory_already_exists(self, tmpdir, sess): """ Assert no errors are raised when using a cache directory that already exists on the filesystem. """ url = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4))) # Make sure our cache dir DOES exist tmp_cache = tmpdir.join('missing', 'folder', 'name').strpath os.makedirs(tmp_cache, self.cache.dirmode) assert os.path.exists(tmp_cache) self.cache.directory = tmp_cache # trigger a cache save sess.get(url) assert True # b/c no exceptions were raised def test_key_length(self, sess): """ Hash table keys: Most file systems have a 255 characters path limitation. * Make sure hash method does not produce too long keys * Ideally hash method generate fixed length keys """ url0 = url1 = 'http://example.org/res?a=1' while len(url0) < 255: url0 += randomdata() url1 += randomdata() assert len(self.cache.encode(url0)) < 200 assert len(self.cache.encode(url0)) == len(self.cache.encode(url1)) def test_max_bytes(self, tmpdir, sess): """ Test that caches the first url but not the second because the maximum bytes have been reached for the cache. """ # use a cache with max_bytes set max_bytes = 1400 self.cache = FileCache(str(tmpdir), max_bytes=max_bytes) sess = CacheControl(requests.Session(), cache=self.cache) url1 = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4))) url2 = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4))) assert url1 != url2 # fill up the cache with url1 response = sess.get(url1) assert not response.from_cache # make sure it got into the cache response = sess.get(url1) assert response.from_cache # do url2 now response = sess.get(url2) assert not response.from_cache # make sure url2 was NOT cached response = sess.get(url2) assert not response.from_cache # clear the cache response = sess.delete(url1) assert not response.from_cache # re-add to cache since bytes should be back to 0 response = sess.get(url1) assert not response.from_cache # verify from cache again response = sess.get(url1) assert response.from_cache
def sess(self, server, tmpdir): self.url = server.application_url self.cache = FileCache(str(tmpdir)) sess = CacheControl(requests.Session(), cache=self.cache) return sess
def delete(self, key): value = self.get(key) FileCache.delete(self, key) removed_bytes = sys.getsizeof(value) if not self.forever: self.curr_bytes -= removed_bytes
def sess(self, url, tmpdir): self.url = url self.cache = FileCache(str(tmpdir)) sess = CacheControl(requests.Session(), cache=self.cache) return sess