def sess(self, url, tmpdir):
     self.url = url
     self.cache = FileCache(str(tmpdir))
     sess = CacheControl(requests.Session(), cache=self.cache)
     yield sess
     # closing session object
     sess.close()
    def __init__(
            self,
            fetcher=None,  # type: Optional[Fetcher]
            namespaces=None,  # type: Optional[Dict[Text, Text]]
            fileuri=None,  # type: Optional[Text]
            copyfrom=None,  # type: Optional[LoadingOptions]
            schemas=None,  # type: Optional[List[Text]]
            original_doc=None,  # type: Optional[Any]
    ):  # type: (...) -> None
        self.idx = {}  # type: Dict[Text, Text]
        self.fileuri = fileuri  # type: Optional[Text]
        self.namespaces = namespaces
        self.schemas = schemas
        self.original_doc = original_doc
        if copyfrom is not None:
            self.idx = copyfrom.idx
            if fetcher is None:
                fetcher = copyfrom.fetcher
            if fileuri is None:
                self.fileuri = copyfrom.fileuri
            if namespaces is None:
                self.namespaces = copyfrom.namespaces
            if namespaces is None:
                schemas = copyfrom.schemas

        if fetcher is None:
            import requests
            from cachecontrol.wrapper import CacheControl
            from cachecontrol.caches import FileCache
            from schema_salad.ref_resolver import DefaultFetcher

            if "HOME" in os.environ:
                session = CacheControl(
                    requests.Session(),
                    cache=FileCache(
                        os.path.join(os.environ["HOME"], ".cache", "salad")),
                )
            elif "TMPDIR" in os.environ:
                session = CacheControl(
                    requests.Session(),
                    cache=FileCache(
                        os.path.join(os.environ["TMPDIR"], ".cache", "salad")),
                )
            else:
                session = CacheControl(requests.Session(),
                                       cache=FileCache("/tmp", ".cache",
                                                       "salad"))
            self.fetcher = DefaultFetcher({}, session)  # type: Fetcher
        else:
            self.fetcher = fetcher

        self.vocab = _vocab
        self.rvocab = _rvocab

        if namespaces is not None:
            self.vocab = self.vocab.copy()
            self.rvocab = self.rvocab.copy()
            for k, v in iteritems(namespaces):
                self.vocab[k] = v
                self.rvocab[v] = k
Exemplo n.º 3
0
    def test_simple_lockfile_arg(self, tmpdir, value, expected):
        if value is not None:
            cache = FileCache(str(tmpdir), use_dir_lock=value)
        else:
            cache = FileCache(str(tmpdir))

        assert issubclass(cache.lock_class, expected)
Exemplo n.º 4
0
class TestStorageFileCache(object):

    @pytest.fixture()
    def sess(self, server):
        self.url = server.application_url
        self.cache = FileCache(STORAGE_FOLDER)
        sess = CacheControl(requests.Session(), cache=self.cache)
        return sess

    def test_filecache_from_cache(self, sess):
        response = sess.get(self.url)
        assert not response.from_cache
        response = sess.get(self.url)
        assert response.from_cache

    def test_key_length(self, sess):
        """
        Hash table keys:
           Most file systems have a 255 characters path limitation.
              * Make sure hash method does not produce too long keys
              * Ideally hash method generate fixed length keys
        """
        url0 = url1 = 'http://example.org/res?a=1'
        while len(url0) < 255:
            url0 += randomdata()
            url1 += randomdata()
        assert len(self.cache.encode(url0)) < 200
        assert len(self.cache.encode(url0)) == len(self.cache.encode(url1))
Exemplo n.º 5
0
    def test_simple_lockfile_arg(self, tmpdir, value, expected):
        if value is not None:
            cache = FileCache(str(tmpdir), use_dir_lock=value)
        else:
            cache = FileCache(str(tmpdir))

        assert issubclass(cache.lock_class, expected)
        cache.close()
Exemplo n.º 6
0
    def __init__(self,
                 fetcher=None,
                 namespaces=None,
                 fileuri=None,
                 copyfrom=None,
                 schemas=None):
        if copyfrom is not None:
            self.idx = copyfrom.idx
            if fetcher is None:
                fetcher = copyfrom.fetcher
            if fileuri is None:
                fileuri = copyfrom.fileuri
            if namespaces is None:
                namespaces = copyfrom.namespaces
            if namespaces is None:
                schemas = copyfrom.schemas
        else:
            self.idx = {}

        if fetcher is None:
            import os
            import requests
            from cachecontrol.wrapper import CacheControl
            from cachecontrol.caches import FileCache
            from schema_salad.ref_resolver import DefaultFetcher
            if "HOME" in os.environ:
                session = CacheControl(requests.Session(),
                                       cache=FileCache(
                                           os.path.join(
                                               os.environ["HOME"], ".cache",
                                               "salad")))
            elif "TMPDIR" in os.environ:
                session = CacheControl(requests.Session(),
                                       cache=FileCache(
                                           os.path.join(
                                               os.environ["TMPDIR"], ".cache",
                                               "salad")))
            else:
                session = CacheControl(requests.Session(),
                                       cache=FileCache("/tmp", ".cache",
                                                       "salad"))
            self.fetcher = DefaultFetcher({}, session)
        else:
            self.fetcher = fetcher

        self.fileuri = fileuri

        self.vocab = _vocab
        self.rvocab = _rvocab
        self.namespaces = namespaces
        self.schemas = schemas

        if namespaces is not None:
            self.vocab = self.vocab.copy()
            self.rvocab = self.rvocab.copy()
            for k, v in iteritems(namespaces):
                self.vocab[k] = v
                self.rvocab[v] = k
Exemplo n.º 7
0
 def __init__(self,
              directory,
              forever=False,
              filemode=0o0600,
              dirmode=0o0700,
              max_bytes=ONE_GIGABYTE,
              logger=warnings):
     FileCache.__init__(self, directory, forever, filemode, dirmode)
     self.max_bytes = max_bytes
     self.curr_bytes = 0
     self.logger = logger
Exemplo n.º 8
0
    def set(self, key, value):
        new_bytes = sys.getsizeof(value)
        total = (self.curr_bytes + new_bytes)
        if total > self.max_bytes:
            message = "Tried adding %d bytes but %d bytes are currently saved" \
                      " in the cache and the max_bytes is set to %d.\n" % \
                      (new_bytes, self.curr_bytes, self.max_bytes)
            self.logger.warn(message)
            return

        FileCache.set(self, key, value)

        self.curr_bytes += new_bytes
Exemplo n.º 9
0
    def set(self, key, value):
        new_bytes = sys.getsizeof(value)
        total = (self.curr_bytes + new_bytes)
        if total > self.max_bytes:
            message = ("Tried adding %d bytes but %d bytes are currently saved"
                       " in the cache and the max_bytes is set to %d."
                       % (new_bytes, self.curr_bytes, self.max_bytes))
            self.logger.warning(message)
            return

        FileCache.set(self, key, value)

        self.curr_bytes += new_bytes
Exemplo n.º 10
0
 def __init__(
         self,
         user_auth,
         language="en-gb",
         user_object=None,
         kirkes_base_url="https://kirkes.finna.fi",
         kirkes_sessioncheck_url="/AJAX/JSON?method=getUserTransactions"):
     self.user_auth = user_auth
     self.language = language
     self.baseUrl = kirkes_base_url
     self.user_object = user_object
     self.sessionCheck_path = kirkes_sessioncheck_url
     self.sessionHttp = requests.Session()
     self.cached_sesssionHttp = CacheControl(
         self.sessionHttp,
         cache=FileCache(os.path.join(settings.BASE_DIR, '.webcache')))
     cookie_obj = requests.cookies.create_cookie(
         domain=self.getBaseURLDomainName(),
         name='language',
         value=self.language)
     self.sessionHttp.cookies.set_cookie(cookie_obj)
     self.cached_sesssionHttp.cookies.set_cookie(cookie_obj)
     retry = Retry(connect=5, backoff_factor=0.5)
     adapter = HTTPAdapter(max_retries=retry)
     self.cached_sesssionHttp.mount('http://', adapter)
     self.sessionHttp.mount('https://', adapter)
     if settings.USE_PROXY:
         get_tor_session(self.sessionHttp)
         get_tor_session(self.cached_sesssionHttp)
Exemplo n.º 11
0
class Settings:
    do_update_wikidata = True
    # Don't activate this, it's most likely broken
    do_update_wikipedia = False

    sparql_file = "free_software_items.rq"
    oauth_token_file = "github_oauth_token.txt"

    # pywikibot is too stupid to cache the calendar model, so let's do this manually
    calendarmodel = pywikibot.Site().data_repository().calendarmodel()
    wikidata_repo = pywikibot.Site("wikidata", "wikidata").data_repository()

    repo_regex = re.compile(r"https://github.com/[^/]+/[^/]+")
    version_regex = re.compile(r"\d+(\.\d+)+")
    unmarked_prerelease_regex = re.compile(
        r"[ -._\d](b|r|rc|beta|alpha)([ .\d].*)?$", re.IGNORECASE)

    cached_session = CacheControl(requests.Session(),
                                  cache=FileCache('cache', forever=True),
                                  heuristic=LastModified())

    properties = {
        "software version": "P348",
        "publication date": "P577",
        "retrieved": "P813",
        "reference URL": "P854",
        "official website": "P856",
        "source code repository": "P1324",
    }
Exemplo n.º 12
0
    def __init__(self,
                 api_key=None,
                 locale=None,
                 anonymize=False,
                 exclude_episodes=False,
                 user_agent=None,
                 cache=None,
                 proxy_uri=None,
                 verify_ssl=True):
        self.api_key = api_key or SHA1_KEY
        self.timestamp = time.mktime(datetime.date.today().timetuple())
        self.user_agent = user_agent or random.choice(USER_AGENTS)
        self.locale = locale or 'en_US'
        self.exclude_episodes = exclude_episodes
        self.caching_enabled = True if cache is True else False
        self.proxy_uri = proxy_uri or DEFAULT_PROXY_URI
        self.anonymize = anonymize
        self.verify_ssl = verify_ssl
        self.session = requests

        if self.caching_enabled:
            warnings.warn('caching will be removed in version 5.0.0 '
                          'due to not being thread safe')
            self.session = CacheControl(requests.Session(),
                                        cache=FileCache('.imdbpie_cache'))
Exemplo n.º 13
0
class Settings:
    do_update_wikidata = True
    # Don't activate this, it's most likely broken
    do_update_wikipedia = False

    normalize_url = True

    sparql_file = "free_software_items.rq"

    # pywikibot is too stupid to cache the calendar model, so let's do this manually
    calendarmodel = pywikibot.Site().data_repository().calendarmodel()
    wikidata_repo = pywikibot.Site("wikidata", "wikidata").data_repository()

    repo_regex = re.compile(r"^[a-z]+://github.com/[^/]+/[^/]+/?$")

    cached_session = CacheControl(
        requests.Session(),
        cache=FileCache("cache", forever=True),
        heuristic=LastModified(),
    )

    properties = {
        "software version": "P348",
        "publication date": "P577",
        "retrieved": "P813",
        "reference URL": "P854",
        "official website": "P856",
        "source code repository": "P1324",
        "title": "P1476",
        "protocol": "P2700",
    }
Exemplo n.º 14
0
def main(argv):
    parser = argparse.ArgumentParser(description='Create or update cfn resource schema')
    parser.add_argument('--update', action='store_true')
    parser.add_argument('--type', metavar='TYPE',
                        help='Restrict parsing resource type properties only to'
                        ' type TYPE. Example: --type AWS::ApiGateway::RestApi')
    parser.add_argument('dest', nargs='?', help='Write resulting schema into FILE'
                        ' instead of just printing it')

    args = parser.parse_args(argv[1:])

    sess = CacheControl(requests.Session(),
                        cache=FileCache('.web_cache'))
    requests.get = sess.get

    stage1 = 'resource-stage1.json'
    if args.update:
        if not args.dest:
            print >> sys.stderr, ('Error: if --update is given, `dest` must be'
                                  ' specified too')
            return 2
        stage1_schema = tools.load(stage1)
        resource_schema = tools.load(args.dest)
        resource_schema['definitions']['resource_template'] = \
            stage1_schema['definitions']['resource_template']
    else:
        resource_schema = tools.load(stage1)

    resource_type_names = tools.get_all_resource_type_names()
    tools.update_all_resource_patterns_by_name(
        resource_schema,
        resource_type_names
    )

    if args.type:
        resource_type_names = [args.type]

    for resource_type_name in resource_type_names:
        print >> sys.stderr, resource_type_name
        resource_properties.set_resource_properties(resource_schema, resource_type_name)

    del resource_schema['definitions']['resource_template']

    all_properties = resource_properties.all_res_properties()
    resource_schema['definitions']['property_types'] = all_properties
    for rpt_name, rpt_schema in all_properties.items():
        print >> sys.stderr, rpt_name
        resource_properties.set_resource_property_type_properties(
            resource_schema,
            rpt_name
        )

    tweak_resource_schema.apply_all_tweaks(resource_schema)

    if args.dest:
        tools.write(resource_schema, args.dest)
    else:
        print tools.print_(resource_schema)

    return 0
Exemplo n.º 15
0
    def open(self):
        global SESSION
        if SESSION is None:
            SESSION = CacheControl(Session(),
                                   cache=FileCache(SESSION_CACHE_PATH))

        try:
            self._response = SESSION.get(self.uri, headers=self.headers)
        except InvalidSchema as e:
            raise DocumentNotFoundException(
                u'document not found: "{0}"'.format(self.uri), cause=e)
        except ConnectionError as e:
            raise LoaderException(u'request connection error: "{0}"'.format(
                self.uri),
                                  cause=e)
        except Exception as e:
            raise LoaderException(u'request error: "{0}"'.format(self.uri),
                                  cause=e)

        status = self._response.status_code
        if status == 404:
            self._response = None
            raise DocumentNotFoundException(
                u'document not found: "{0}"'.format(self.uri))
        elif status != 200:
            self._response = None
            raise LoaderException(u'request error {0:d}: "{1}"'.format(
                status, self.uri))
Exemplo n.º 16
0
    def fetch(self):
        feed = None
        if InformantConfig().get_argv_use_cache():
            cachefile = InformantConfig().get_cachefile()
            os.umask(
                0o0002
            )  # unrestrict umask so we can cache with proper permissions
            try:
                session = CacheControl(requests.Session(),
                                       cache=FileCache(cachefile,
                                                       filemode=0o0664,
                                                       dirmode=0o0775))
                feed = feedparser.parse(session.get(self.url).content)
            except Exception as e:
                ui.err_print('Unable to read cache information: {}'.format(e))
                feed = feedparser.parse(self.url)
        else:
            feed = feedparser.parse(self.url)

        if feed.bozo:
            ui.err_print('Encountered feed error: {}'.format(
                feed.bozo_exception))
            sys.exit(255)
        else:
            return feed
Exemplo n.º 17
0
def requests_session(nocache=False):
    if nocache:
        return requests.Session()
    return CacheControl(
        requests.Session(),
        cache=FileCache(CACHE_FILENAME)
    )
Exemplo n.º 18
0
def get_events_from_icalendars():
    global now, midnight

    now = localtz.localize(datetime.datetime.now())
    midnight = localtz.localize(datetime.datetime.combine(now, datetime.time(0,0,0)))

    cz = Calzone()

    session = FuturesSession()
    session.mount('https://', CacheControlAdapter(cache=FileCache('.webcache'), heuristic=ForceCacheHeuristic()))

    cals = {k: session.get(u) for k,u in calendars.items()}

    concurrent.futures.wait(cals.values())

    for k,req in cals.items():
        try:
            cz.load(req.result().text)
        except Exception as err:
            print("Failed to load calendar '{}'".format(k))
            print (err)

    try:
        events = cz.get_events(midnight, midnight + datetime.timedelta(days=90))
    except Exception as e:
        print (e)

    events.sort(key=lambda e: e.start)

    return events
Exemplo n.º 19
0
def cli(ctx, url, token):
    spinner = Halo(text="Login and fetch forks", spinner="dots")
    spinner.start()

    if token:
        gh = github3.login(token=token)
    else:
        user = click.prompt("username",
                            hide_input=False,
                            confirmation_prompt=False)
        password = click.prompt("Password",
                                hide_input=True,
                                confirmation_prompt=True)
        gh = github3.login(user, password=password)
    cachecontrol.CacheControl(gh.session,
                              cache=FileCache(".fork_work_cache"),
                              heuristic=OneDayHeuristic())

    login, repo = urlparse(url).path[1:].split("/")
    repository = gh.repository(login, repo)
    forks = repository.forks()

    spinner.stop()
    RepoCtx = namedtuple("Repo", ["repository", "forks", "gh"])
    ctx.obj = RepoCtx(repo, forks, gh)
Exemplo n.º 20
0
 def test_file_cache_recognizes_consumed_file_handle(self, url):
     s = CacheControl(Session(), FileCache("web_cache"))
     the_url = url + "cache_60"
     s.get(the_url)
     r = s.get(the_url)
     assert r.from_cache
     s.close()
Exemplo n.º 21
0
class Settings:
    do_update_wikidata = True

    # Read also tags if a project doesn't use githubs releases
    read_tags = True

    normalize_repo_url = True

    blacklist_page = "User:Github-wiki-bot/Exceptions"
    whitelist_page = "User:Github-wiki-bot/Whitelist"
    blacklist: List[str] = []
    whitelist: List[str] = []
    sparql_file = "free_software_items.rq"

    license_sparql_file = "free_licenses.rq"
    licenses: Dict[str, str] = {}

    # pywikibot is too stupid to cache the calendar model, so let's do this manually
    calendarmodel = pywikibot.Site().data_repository().calendarmodel()
    wikidata_repo = pywikibot.Site("wikidata", "wikidata").data_repository()

    repo_regex = re.compile(r"^[a-z]+://github.com/[^/]+/[^/]+/?$")

    cached_session: requests.Session = CacheControl(requests.Session(),
                                                    cache=FileCache("cache"))
Exemplo n.º 22
0
def main(argv):
    sess = CacheControl(requests.Session(),
                        cache=FileCache('.web_cache'))
    requests.get = sess.get

    schema = tools.load('schema.json')
    schema['definitions']['Parameter']['properties'] = parse_parameters()
    tools.write(schema, 'schema.json')
Exemplo n.º 23
0
    def get_reader(self):
        sess = CacheControl(requests.Session(), cache=FileCache(gettempdir()))
        req = sess.get(self.file)

        # if the response is not 200, an exception will be raised
        req.raise_for_status()

        return io.BufferedReader(io.BytesIO(req.content))
Exemplo n.º 24
0
def get_cached_session(caching=True):
    if not caching:
        return requests.Session()

    CACHE_DIR = 'web_cache'
    return CacheControl(requests.Session(),
                        cache=FileCache(CACHE_DIR),
                        heuristic=LastModifiedNoDate(require_date=False))
Exemplo n.º 25
0
 def session(self):
     if self._session is None:
         self._session = real_requests.Session()
         if CacheControlAdapter:
             adapter = CacheControlAdapter(cache=FileCache(".webcache"))
             self._session.mount("http://", adapter)
             self._session.mount("https://", adapter)
             print("Caching to .webcache")
     return self._session
Exemplo n.º 26
0
    def __init__(self):
        session = requests.Session()
        self.rootURL = "https://api.spiget.org/v2/"
        session.headers['User-Agent'] = "{} v{}".format(metadata.NAME, metadata.VERSION)

        self.session = CacheControl(
            session,
            cache=FileCache('.spl/cache')
        )
Exemplo n.º 27
0
def get_cached_session(caching=True):
    if not caching:
        return requests.Session()
    return requests.Session(
    )  # For some reason, in concurrent environments CacheControl works quite bad.

    CACHE_DIR = 'web_cache'
    return CacheControl(requests.Session(),
                        cache=FileCache(CACHE_DIR),
                        heuristic=_LastModifiedNoDate(require_date=False))
Exemplo n.º 28
0
    def __init__(self, headers=None, cookies=None, cache_name=None, delay=1, expire_hours=12, as_string=False):
        '''
        Base class for common scraping tasks
        Args:
            headers: dict of headers
            cookies: cookiejar object
            cache_name: should be full path
            delay: int (be polite!!!)
            expire_hours: int - default 4
            as_string: get string rather than parsed json
        '''
        logging.getLogger(__name__).addHandler(logging.NullHandler())

        if not cookies:
            try:
                import cookielib
                cookies = cookielib.MozillaCookieJar()
            except (NameError, ImportError) as e:
                try:
                    import http.cookiejar
                    cookies = http.cookiejar.MozillaCookieJar()
                except Exception as e:
                    pass

        _s = requests.Session()
        _s.cookies = cookies

        if headers:
            _s.headers.update(headers)
        else:
            _s.headers.update({'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'})

        if cache_name:
            if not '/' in cache_name:
                cache_name = os.path.join('/tmp', cache_name)
            try:
                from cachecontrol import CacheControlAdapter
                from cachecontrol.heuristics import ExpiresAfter
                from cachecontrol.caches import FileCache
                _s.mount('http://', CacheControlAdapter(cache=FileCache(cache_name), cache_etags = False, heuristic=ExpiresAfter(hours=expire_hours)))
            except ImportError as e:
                try:
                    import requests_cache
                    requests_cache.install_cache(cache_name)
                except:
                    pass

        self.s = _s
        self.urls = []
        self.as_string = as_string

        if delay > 0:
            self.delay = delay
        else:
            self.delay = None
Exemplo n.º 29
0
 def __init__(self, destination, staging, s3_url, dry_run, cache):
     self.destination = destination
     self.staging = staging
     self.s3_url = s3_url
     self.dry_run = dry_run
     if cache:
         self.info(f"Using cache {cache}")
         self.fetcher = CacheControl(requests.session(), cache=FileCache(cache))
     else:
         self.info(f"Making uncached requests")
         self.fetcher = requests
    def __init__(self,
                 ctx,
                 schemagraph=None,
                 foreign_properties=None,
                 idx=None,
                 cache=None,
                 session=None):
        # type: (Loader.ContextType, rdflib.Graph, Set[unicode], Dict[unicode, Union[List, Dict[unicode, Any], unicode]], Dict[unicode, Any], requests.sessions.Session) -> None
        normalize = lambda url: urlparse.urlsplit(url).geturl()
        if idx is not None:
            self.idx = idx
        else:
            self.idx = NormDict(normalize)

        self.ctx = {}  # type: Loader.ContextType
        if schemagraph is not None:
            self.graph = schemagraph
        else:
            self.graph = rdflib.graph.Graph()

        if foreign_properties is not None:
            self.foreign_properties = foreign_properties
        else:
            self.foreign_properties = set()

        if cache is not None:
            self.cache = cache
        else:
            self.cache = {}

        self.session = None  # type: requests.sessions.Session
        if session is not None:
            self.session = session
        else:
            self.session = CacheControl(requests.Session(),
                                        cache=FileCache(
                                            os.path.join(
                                                os.environ["HOME"], ".cache",
                                                "salad")))

        self.url_fields = None  # type: Set[unicode]
        self.scoped_ref_fields = None  # type: Dict[unicode, int]
        self.vocab_fields = None  # type: Set[unicode]
        self.identifiers = None  # type: Set[unicode]
        self.identity_links = None  # type: Set[unicode]
        self.standalone = None  # type: Set[unicode]
        self.nolinkcheck = None  # type: Set[unicode]
        self.vocab = {}  # type: Dict[unicode, unicode]
        self.rvocab = {}  # type: Dict[unicode, unicode]
        self.idmap = None  # type: Dict[unicode, Any]
        self.mapPredicate = None  # type: Dict[unicode, unicode]
        self.type_dsl_fields = None  # type: Set[unicode]

        self.add_context(ctx)
Exemplo n.º 31
0
def amalgama_lyrics(artist, song):
    url = amalgama.get_url(artist, song)
    try:
        cached_sess = CacheControl(sess, cache=FileCache('.amalgama'))
        response = cached_sess.get(url)
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        print(f'{artist}-{song} not found in amalgama {url}')
        return None
    text = amalgama.get_html(response.text)
    return text
Exemplo n.º 32
0
def get_session():
    CACHE_FOLDER.mkdir(exist_ok=True)
    cache = FileCache(str(CACHE_FOLDER), forever=True)
    cache.set("foo", b"bar")
    assert cache.get("foo") == b"bar"
    session = RateLimitingSession()
    # session.headers.update({"x-api-key": "something-something-darkside"})
    session.mount(
        "https://www.metlink.org.nz/",
        CacheControlAdapter(heuristic=BetterExpiresAfter(days=7), cache=cache),
    )
    session.mount(
        METLINK_API_URL_PREFIX,
        CacheControlAdapter(heuristic=BetterExpiresAfter(days=1), cache=cache),
    )
    session.mount(
        METLINK_API_URL_PREFIX + "ServiceLocation/",
        CacheControlAdapter(heuristic=BetterExpiresAfter(seconds=90),
                            cache=cache),
    )
    return session
Exemplo n.º 33
0
 def __init__(self, destination: Path, staging: Path, s3_url: str, dry_run: bool, is_nightly_enabled: bool,
              cache: Optional[Path]):
     self.destination = destination
     self.staging = staging
     self.s3_url = s3_url
     self.dry_run = dry_run
     self.is_nightly_enabled = is_nightly_enabled
     if cache:
         self.info(f"Using cache {cache}")
         self.fetcher = CacheControl(requests.session(), cache=FileCache(cache))
     else:
         self.info("Making uncached requests")
         self.fetcher = requests
    def test_max_bytes(self, tmpdir, sess):
        """
        Test that caches the first url but not the second because
        the maximum bytes have been reached for the cache.
        """
        # use a cache with max_bytes set
        max_bytes = 1400
        self.cache = FileCache(str(tmpdir), max_bytes=max_bytes)
        sess = CacheControl(requests.Session(), cache=self.cache)

        url1 = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4)))
        url2 = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4)))
        assert url1 != url2

        # fill up the cache with url1
        response = sess.get(url1)
        assert not response.from_cache

        # make sure it got into the cache
        response = sess.get(url1)
        assert response.from_cache

        # do url2 now
        response = sess.get(url2)
        assert not response.from_cache

        # make sure url2 was NOT cached
        response = sess.get(url2)
        assert not response.from_cache

        # clear the cache
        response = sess.delete(url1)
        assert not response.from_cache

        # re-add to cache since bytes should be back to 0
        response = sess.get(url1)
        assert not response.from_cache

        # verify from cache again
        response = sess.get(url1)
        assert response.from_cache
Exemplo n.º 35
0
class TestStorageFileCache(object):

    @pytest.fixture()
    def sess(self, url, tmpdir):
        self.url = url
        self.cache = FileCache(str(tmpdir))
        sess = CacheControl(requests.Session(), cache=self.cache)
        yield sess
        # closing session object
        sess.close()

    def test_filecache_from_cache(self, sess):
        response = sess.get(self.url)
        assert not response.from_cache
        response = sess.get(self.url)
        assert response.from_cache

    def test_filecache_directory_not_exists(self, tmpdir, sess):
        url = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4)))

        # Make sure our cache dir doesn't exist
        tmp_cache = tmpdir.join('missing', 'folder', 'name').strpath
        assert not os.path.exists(tmp_cache)

        self.cache.directory = tmp_cache

        # trigger a cache save
        sess.get(url)

        # Now our cache dir does exist
        assert os.path.exists(tmp_cache)

    def test_filecache_directory_already_exists(self, tmpdir, sess):
        """
        Assert no errors are raised when using a cache directory
        that already exists on the filesystem.
        """
        url = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4)))

        # Make sure our cache dir DOES exist
        tmp_cache = tmpdir.join('missing', 'folder', 'name').strpath
        os.makedirs(tmp_cache, self.cache.dirmode)

        assert os.path.exists(tmp_cache)

        self.cache.directory = tmp_cache

        # trigger a cache save
        sess.get(url)

        assert True  # b/c no exceptions were raised

    def test_key_length(self, sess):
        """
        Hash table keys:
           Most file systems have a 255 characters path limitation.
              * Make sure hash method does not produce too long keys
              * Ideally hash method generate fixed length keys
        """
        url0 = url1 = 'http://example.org/res?a=1'
        while len(url0) < 255:
            url0 += randomdata()
            url1 += randomdata()
        assert len(self.cache.encode(url0)) < 200
        assert len(self.cache.encode(url0)) == len(self.cache.encode(url1))

    def test_cant_use_dir_and_lock_class(self, tmpdir):
        with pytest.raises(ValueError):
            FileCache(str(tmpdir), use_dir_lock=True, lock_class=object())

    @pytest.mark.parametrize(
        ("value", "expected"),
        [
            (None, LockFile),
            (True, MkdirLockFile),
            (False, LockFile),
        ],
    )
    def test_simple_lockfile_arg(self, tmpdir, value, expected):
        if value is not None:
            cache = FileCache(str(tmpdir), use_dir_lock=value)
        else:
            cache = FileCache(str(tmpdir))

        assert issubclass(cache.lock_class, expected)
        cache.close()

    def test_lock_class(self, tmpdir):
        lock_class = object()
        cache = FileCache(str(tmpdir), lock_class=lock_class)
        assert cache.lock_class is lock_class
        cache.close()
        
    def test_filecache_with_delete_request(self, tmpdir, sess):
        # verifies issue #155
        url = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4)))
        sess.delete(url)
        assert True  # test verifies no exceptions were raised

    def test_filecache_with_put_request(self, tmpdir, sess):
        # verifies issue #155
        url = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4)))
        sess.put(url)
        assert True  # test verifies no exceptions were raised
Exemplo n.º 36
0
 def test_lock_class(self, tmpdir):
     lock_class = object()
     cache = FileCache(str(tmpdir), lock_class=lock_class)
     assert cache.lock_class is lock_class
     cache.close()
class TestStorageFileCache(object):
    @pytest.fixture()
    def sess(self, server, tmpdir):
        self.url = server.application_url
        self.cache = FileCache(str(tmpdir))
        sess = CacheControl(requests.Session(), cache=self.cache)
        return sess

    def test_filecache_from_cache(self, sess):
        response = sess.get(self.url)
        assert not response.from_cache
        response = sess.get(self.url)
        assert response.from_cache

    def test_filecache_directory_not_exists(self, tmpdir, sess):
        url = self.url + "".join(sample(string.ascii_lowercase, randint(2, 4)))

        # Make sure our cache dir doesn't exist
        tmp_cache = tmpdir.join("missing", "folder", "name").strpath
        assert not os.path.exists(tmp_cache)

        self.cache.directory = tmp_cache

        # trigger a cache save
        sess.get(url)

        # Now our cache dir does exist
        assert os.path.exists(tmp_cache)

    def test_filecache_directory_already_exists(self, tmpdir, sess):
        """
        Assert no errors are raised when using a cache directory
        that already exists on the filesystem.
        """
        url = self.url + "".join(sample(string.ascii_lowercase, randint(2, 4)))

        # Make sure our cache dir DOES exist
        tmp_cache = tmpdir.join("missing", "folder", "name").strpath
        os.makedirs(tmp_cache, self.cache.dirmode)

        assert os.path.exists(tmp_cache)

        self.cache.directory = tmp_cache

        # trigger a cache save
        sess.get(url)

        assert True  # b/c no exceptions were raised

    def test_key_length(self, sess):
        """
        Hash table keys:
           Most file systems have a 255 characters path limitation.
              * Make sure hash method does not produce too long keys
              * Ideally hash method generate fixed length keys
        """
        url0 = url1 = "http://example.org/res?a=1"
        while len(url0) < 255:
            url0 += randomdata()
            url1 += randomdata()
        assert len(self.cache.encode(url0)) < 200
        assert len(self.cache.encode(url0)) == len(self.cache.encode(url1))
Exemplo n.º 38
0
 def sess(self, server):
     self.url = server.application_url
     self.cache = FileCache(STORAGE_FOLDER)
     sess = CacheControl(requests.Session(), cache=self.cache)
     return sess
class TestStorageFileCache(object):

    @pytest.fixture()
    def sess(self, server, tmpdir):
        self.url = server.application_url
        self.cache = FileCache(str(tmpdir))
        sess = CacheControl(requests.Session(), cache=self.cache)
        return sess

    def test_filecache_from_cache(self, sess):
        response = sess.get(self.url)
        assert not response.from_cache
        response = sess.get(self.url)
        assert response.from_cache

    def test_filecache_directory_not_exists(self, tmpdir, sess):
        url = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4)))

        # Make sure our cache dir doesn't exist
        tmp_cache = tmpdir.join('missing', 'folder', 'name').strpath
        assert not os.path.exists(tmp_cache)

        self.cache.directory = tmp_cache

        # trigger a cache save
        sess.get(url)

        # Now our cache dir does exist
        assert os.path.exists(tmp_cache)

    def test_filecache_directory_already_exists(self, tmpdir, sess):
        """
        Assert no errors are raised when using a cache directory
        that already exists on the filesystem.
        """
        url = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4)))

        # Make sure our cache dir DOES exist
        tmp_cache = tmpdir.join('missing', 'folder', 'name').strpath
        os.makedirs(tmp_cache, self.cache.dirmode)

        assert os.path.exists(tmp_cache)

        self.cache.directory = tmp_cache

        # trigger a cache save
        sess.get(url)

        assert True  # b/c no exceptions were raised

    def test_key_length(self, sess):
        """
        Hash table keys:
           Most file systems have a 255 characters path limitation.
              * Make sure hash method does not produce too long keys
              * Ideally hash method generate fixed length keys
        """
        url0 = url1 = 'http://example.org/res?a=1'
        while len(url0) < 255:
            url0 += randomdata()
            url1 += randomdata()
        assert len(self.cache.encode(url0)) < 200
        assert len(self.cache.encode(url0)) == len(self.cache.encode(url1))

    def test_max_bytes(self, tmpdir, sess):
        """
        Test that caches the first url but not the second because
        the maximum bytes have been reached for the cache.
        """
        # use a cache with max_bytes set
        max_bytes = 1400
        self.cache = FileCache(str(tmpdir), max_bytes=max_bytes)
        sess = CacheControl(requests.Session(), cache=self.cache)

        url1 = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4)))
        url2 = self.url + ''.join(sample(string.ascii_lowercase, randint(2, 4)))
        assert url1 != url2

        # fill up the cache with url1
        response = sess.get(url1)
        assert not response.from_cache

        # make sure it got into the cache
        response = sess.get(url1)
        assert response.from_cache

        # do url2 now
        response = sess.get(url2)
        assert not response.from_cache

        # make sure url2 was NOT cached
        response = sess.get(url2)
        assert not response.from_cache

        # clear the cache
        response = sess.delete(url1)
        assert not response.from_cache

        # re-add to cache since bytes should be back to 0
        response = sess.get(url1)
        assert not response.from_cache

        # verify from cache again
        response = sess.get(url1)
        assert response.from_cache
Exemplo n.º 40
0
 def __init__(self, directory, forever=False, filemode=0o0600,
              dirmode=0o0700, max_bytes=ONE_GIGABYTE, logger=warnings):
     FileCache.__init__(self, directory, forever, filemode, dirmode)
     self.max_bytes = max_bytes
     self.curr_bytes = 0
     self.logger = logger
 def sess(self, server, tmpdir):
     self.url = server.application_url
     self.cache = FileCache(str(tmpdir))
     sess = CacheControl(requests.Session(), cache=self.cache)
     return sess
Exemplo n.º 42
0
 def delete(self, key):
     value = self.get(key)
     FileCache.delete(self, key)
     removed_bytes = sys.getsizeof(value)
     if not self.forever:
         self.curr_bytes -= removed_bytes
Exemplo n.º 43
0
 def sess(self, url, tmpdir):
     self.url = url
     self.cache = FileCache(str(tmpdir))
     sess = CacheControl(requests.Session(), cache=self.cache)
     return sess