Exemplo n.º 1
0
Arquivo: utils.py Projeto: leifj/pyFF
def url_get(url):
    """
    Download an URL using a cache and return the response object
    :param url:
    :return:
    """
    s = None
    info = dict()

    if 'file://' in url:
        s = requests.session()
        s.mount('file://', FileAdapter())
    else:
        s = CachedSession(cache_name="pyff_cache",
                          backend=config.request_cache_backend,
                          expire_after=config.request_cache_time,
                          old_data_on_error=True)
    headers = {'User-Agent': "pyFF/{}".format(__version__), 'Accept': '*/*'}
    try:
        r = s.get(url, headers=headers, verify=False, timeout=config.request_timeout)
    except IOError as ex:
        s = requests.Session()
        r = s.get(url, headers=headers, verify=False, timeout=config.request_timeout)

    if six.PY2:
        r.encoding = "utf-8"

    log.debug("url_get({}) returns {} chrs encoded as {}".format(url, len(r.content), r.encoding))

    if config.request_override_encoding is not None:
        r.encoding = config.request_override_encoding

    return r
Exemplo n.º 2
0
    def test_return_old_data_on_error(self, datetime_mock):
        datetime_mock.utcnow.return_value = datetime.utcnow()
        expire_after = 100
        url = httpbin("get")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, old_data_on_error=True, expire_after=expire_after)
        header = "X-Tst"

        def get(n):
            return s.get(url, headers={header: n}).json()["headers"][header]

        get("expired")
        self.assertEquals(get("2"), "expired")
        datetime_mock.utcnow.return_value = datetime.utcnow() + timedelta(seconds=expire_after * 2)

        with mock.patch.object(s.cache, "save_response", side_effect=Exception):
            self.assertEquals(get("3"), "expired")

        with mock.patch("requests_cache.core.OriginalSession.send") as send_mock:
            resp_mock = requests.Response()
            request = requests.Request("GET", url)
            resp_mock.request = request.prepare()
            resp_mock.status_code = 400
            resp_mock._content = '{"other": "content"}'
            send_mock.return_value = resp_mock
            self.assertEquals(get("3"), "expired")

            resp_mock.status_code = 200
            self.assertIs(s.get(url).content, resp_mock.content)

        # default behaviour
        datetime_mock.return_value = datetime.utcnow() + timedelta(seconds=expire_after * 2)
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, old_data_on_error=False, expire_after=100)
        with mock.patch.object(s.cache, "save_response", side_effect=Exception):
            with self.assertRaises(Exception):
                s.get(url)
Exemplo n.º 3
0
    def test_return_old_data_on_error(self, datetime_mock):
        datetime_mock.utcnow.return_value = datetime.utcnow()
        expire_after = 100
        url = httpbin("get")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, old_data_on_error=True, expire_after=expire_after)
        header = "X-Tst"

        def get(n):
            return s.get(url, headers={header: n}).json()["headers"][header]

        get("expired")
        self.assertEquals(get("2"), "expired")
        datetime_mock.utcnow.return_value = datetime.utcnow() + timedelta(seconds=expire_after * 2)

        with mock.patch.object(s.cache, "save_response", side_effect=Exception):
            self.assertEquals(get("3"), "expired")

        with mock.patch("requests_cache.core.OriginalSession.send") as send_mock:
            resp_mock = requests.Response()
            request = requests.Request("GET", url)
            resp_mock.request = request.prepare()
            resp_mock.status_code = 400
            resp_mock._content = '{"other": "content"}'
            send_mock.return_value = resp_mock
            self.assertEquals(get("3"), "expired")

            resp_mock.status_code = 200
            self.assertIs(s.get(url).content, resp_mock.content)

        # default behaviour
        datetime_mock.return_value = datetime.utcnow() + timedelta(seconds=expire_after * 2)
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, old_data_on_error=False, expire_after=100)
        with mock.patch.object(s.cache, "save_response", side_effect=Exception):
            with self.assertRaises(Exception):
                s.get(url)
Exemplo n.º 4
0
    def __init__(self):
        self.settings = EsiSettings.getInstance()
        self.server_base: ApiBase = supported_servers[self.settings.get("server")]

        # session request stuff
        self._session = Session()
        self._basicHeaders = {
            'Accept': 'application/json',
            'User-Agent': (
                'pyfa v{}'.format(config.version)
            )
        }
        self._session.headers.update(self._basicHeaders)
        self._session.proxies = NetworkSettings.getInstance().getProxySettingsInRequestsFormat()

        # Set up cached session. This is only used for SSO meta data for now, but can be expanded to actually handle
        # various ESI caching (using ETag, for example) in the future
        cached_session = CachedSession(
            os.path.join(config.savePath, config.ESI_CACHE),
            backend="sqlite",
            cache_control=True,                # Use Cache-Control headers for expiration, if available
            expire_after=timedelta(days=1),    # Otherwise expire responses after one day
            stale_if_error=True,               # In case of request errors, use stale cache data if possible
        )
        cached_session.headers.update(self._basicHeaders)
        cached_session.proxies = NetworkSettings.getInstance().getProxySettingsInRequestsFormat()

        meta_call = cached_session.get("https://%s/.well-known/oauth-authorization-server" % self.server_base.sso)
        meta_call.raise_for_status()
        self.server_meta = meta_call.json()

        jwks_call = cached_session.get(self.server_meta["jwks_uri"])
        jwks_call.raise_for_status()
        self.jwks = jwks_call.json()
Exemplo n.º 5
0
def get_page_url():
    """
    Initial function to get the list of page urls with respect to different alphabet.
    :return: list of urls
    """
    page_url_list = []
    url = "http://devri.bzh/dictionnaire/a/"
    session = CachedSession()
    page = session.get(url)
    soup = BeautifulSoup(page.content, "html.parser")
    letter_list = soup.find_all("a", class_="enfant")
    for i in tqdm(letter_list):
        url_letter = "http://devri.bzh" + i["href"]
        page_letter = session.get(url_letter)
        soup_letter = BeautifulSoup(page_letter.content, "html.parser")
        page_num = soup_letter.find("li", class_=["MarkupPagerNavLast MarkupPagerNavLastNum", "MarkupPagerNavLastNum"])
        try:
            page_num = page_num.a.text
            url_list = [url_letter]
            for j in range(2, int(page_num) + 1):
                url_list.append(url_letter + f"page{j}")
            page_url_list += url_list
        except AttributeError:
            pass
    # add page for alphabet z manually
    page_url_list += ["http://devri.bzh/dictionnaire/z/"]
    return page_url_list
Exemplo n.º 6
0
 def test_throttle_cache(self):
     url = httpbin('get')
     s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.06)
     s.throttle(url, 0.5) # one every 2 seconds
     r = s.get(url)
     time.sleep(0.6)
     t = time.time()
     r = s.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, 1)
Exemplo n.º 7
0
 def test_ignore_cgi_parameter(self):
     s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=5)
     s.ignore_cgi('foo') 
     url = httpbin('get?foo=1')
     r = s.get(url)
     self.assertFalse(r.from_cache)
     r = s.get(url)
     self.assertTrue(r.from_cache)
     url = httpbin('get')
     r = s.get(url)
     self.assertTrue(r.from_cache)
Exemplo n.º 8
0
def url_get(url: str) -> Response:
    """
    Download an URL using a cache and return the response object
    :param url:
    :return:
    """

    s: Union[Session, CachedSession]
    if 'file://' in url:
        s = requests.session()
        s.mount('file://', FileAdapter())
    elif 'dir://' in url:
        s = requests.session()
        s.mount('dir://', DirAdapter())
    else:
        retry = Retry(total=3, backoff_factor=0.5)
        adapter = HTTPAdapter(max_retries=retry)
        s = CachedSession(
            cache_name="pyff_cache",
            backend=config.request_cache_backend,
            expire_after=config.request_cache_time,
            old_data_on_error=True,
        )
        s.mount('http://', adapter)
        s.mount('https://', adapter)

    headers = {'User-Agent': "pyFF/{}".format(__version__), 'Accept': '*/*'}
    _etag = None
    if _etag is not None:
        headers['If-None-Match'] = _etag
    try:
        r = s.get(url,
                  headers=headers,
                  verify=False,
                  timeout=config.request_timeout)
    except IOError as ex:
        s = requests.Session()
        r = s.get(url,
                  headers=headers,
                  verify=False,
                  timeout=config.request_timeout)

    if six.PY2:
        r.encoding = "utf-8"

    log.debug("url_get({}) returns {} chrs encoded as {}".format(
        url, len(r.content), r.encoding))

    if config.request_override_encoding is not None:
        r.encoding = config.request_override_encoding

    return r
Exemplo n.º 9
0
 def test_expire_cache(self):
     delay = 1
     url = httpbin('delay/%s' % delay)
     s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.06)
     t = time.time()
     r = s.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, delay)
     time.sleep(0.5)
     t = time.time()
     r = s.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, delay)
Exemplo n.º 10
0
 def test_expire_cache(self):
     delay = 1
     url = httpbin('delay/%s' % delay)
     s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.06)
     t = time.time()
     r = s.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, delay)
     time.sleep(0.5)
     t = time.time()
     r = s.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, delay)
Exemplo n.º 11
0
    def test_ignore_parameters(self):
        url = httpbin("get")
        ignored_param = "ignored"
        usual_param = "some"
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, ignored_parameters=[ignored_param])

        params = {ignored_param: "1", usual_param: "1"}
        s.get(url, params=params)
        self.assertTrue(s.get(url, params=params).from_cache)

        params[ignored_param] = "new"
        self.assertTrue(s.get(url, params=params).from_cache)

        params[usual_param] = "new"
        self.assertFalse(s.get(url, params=params).from_cache)
Exemplo n.º 12
0
def imdb_search(query):
    session = CachedSession(expire_after=60 * 60 * 24)
    stripped_query = "".join([x for x in query
                              if x.isalnum() or x == " "]).lower()
    r = session.get("https://www.imdb.com/find",
                    params={
                        "q": stripped_query,
                        "s": "tt",
                        "ttype": "ft"
                    })
    page_html = r.text
    soup = BeautifulSoup(page_html, "html.parser")
    results = []
    for result in soup.select(".result_text"):
        title = result.a.get_text()
        imdb_id = result.a.get("href").split("/")
        try:
            imdb_id = result.a.get("href").split("/title/")[1].split("/")[0]
        except IndexError:
            imdb_id = None
            continue
        result.a.decompose()
        try:
            year = result.get_text().split("(")[1].split(")")[0]
            int(year)
        except:
            year = None
            continue

        results.append((title, year, imdb_id))

    return results
Exemplo n.º 13
0
    def __new__(self, course_id: int, session: CachedSession) -> Tuple[Dict]:
        lesson_list_at_somewhere_response: CachedResponse = session.get(
            f"https://foxford.ru/api/courses/{course_id}/lessons",
            headers={"X-Requested-With": "XMLHttpRequest"})

        if lesson_list_at_somewhere_response.status_code != 200:
            return {"fatal_error": "Lesson list fetch has failed"}

        if not {"lessons", "cursors"}.issubset(
                set(lesson_list_at_somewhere_response.json())):
            return {"fatal_error": "Lesson list structure is unknown"}

        if "id" not in lesson_list_at_somewhere_response.json()["lessons"][0]:
            return {"fatal_error": "Lesson structure is unknown"}

        self.course_id = course_id
        self.session = session

        return pipe(
            lambda json:
            (*self.recursive_collection(self, "before", json["cursors"][
                "before"]), *json["lessons"], *self.recursive_collection(
                    self, "after", json["cursors"]["after"])),
            lambda lessons: map(
                lambda lesson: self.lesson_extension(self, lesson), lessons),
            tuple)(lesson_list_at_somewhere_response.json())
Exemplo n.º 14
0
    def session_obj(self):
        session = CachedSession(allowable_methods=('GET', 'POST'),
                                ignored_parameters=['smpbss'])

        if not IceDaily.RECAPTCHA:
            with session.cache_disabled():
                response = session.get(
                    url=
                    'https://www.theice.com/marketdata/reports/datawarehouse/ConsolidatedEndOfDayReportPDF.shtml',
                    headers={
                        'User-Agent': 'Mozilla/5.0',
                        'X-Requested-With': 'XMLHttpRequest'
                    },
                    params={
                        'selectionForm': '',
                        'exchangeCode': 'IFEU',
                        'optionRequest': self.flavor['optionRequest']
                    })

            bs = BeautifulSoup(response.text)

            df = pd.DataFrame([(opt['value'], opt.text)
                               for opt in bs.find_all('option')],
                              columns=["WebActiveCode", "ActiveName"])

            df['ActiveCode'] = df.WebActiveCode.apply(
                lambda s: s.split('|', 1)[1] if '|' in s else None)
            df = df.dropna(how='any')

            self.update_actives(df)

        return session
Exemplo n.º 15
0
def url_get(url):
    """
    Download an URL using a cache and return the response object
    :param url:
    :return:
    """
    s = None
    info = dict()

    log.debug("GET URL {!s}".format(url))

    if 'file://' in url:
        s = requests.session()
        s.mount('file://', FileAdapter())
    else:
        s = CachedSession(cache_name="pyff_cache",
                          backend=config.request_cache_backend,
                          expire_after=config.request_cache_time,
                          old_data_on_error=True)
    headers = {'User-Agent': "pyFF/{}".format(__version__), 'Accept': '*/*'}
    r = s.get(url,
              headers=headers,
              verify=False,
              timeout=config.request_timeout)
    if config.request_override_encoding is not None:
        r.encoding = config.request_override_encoding

    return r
Exemplo n.º 16
0
 def wrapper(self, *args, **kwargs):
     if not self.omdb_json:
         session = CachedSession(expires_after=60*60*24*7)
         if self.imdb_id:
             r = session.get("http://www.omdbapi.com/", params={
                 "apiKey": config.OMDB_API_KEY,
                 "i": self.imdb_id
             })
             self.omdb_json = r.json()
         else:
             r = session.get("http://www.omdbapi.com/", params={
                 "apiKey": config.OMDB_API_KEY,
                 "t": self.title,
                 "y": self.year
             })
             self.omdb_json = r.json()
     return f(self, self.omdb_json, *args, **kwargs)
    def test_basic_response_config(self):
        """
        Here is how to load something into a session cache with requests_cache and responses
        """
        session = CachedSession(backend='memory', expire_after=100000)

        url = 'http://example.com/123'
        data = {'foo': 'yes please'}
        responses.add(responses.GET, url, json=data)
        response_one = session.get(url)
        self.assertFalse(response_one.from_cache)
        responses.reset()
        responses.add(responses.GET, url, json={})

        response_two = session.get(url)
        self.assertTrue(response_two.from_cache)
        self.assertEqual(response_two.json()['foo'], data['foo'])
Exemplo n.º 18
0
def main():
    session = CachedSession('example_cache', backend='sqlite')

    # The real request will only be made once; afterward, the cached response is used
    for i in range(5):
        response = session.get('http://httpbin.org/get')

    # This is more obvious when calling a slow endpoint
    for i in range(5):
        response = session.get('http://httpbin.org/delay/2')

    # Caching can be disabled if we want to get a fresh page and not cache it
    with session.cache_disabled():
        print(session.get('http://httpbin.org/ip').text)

    # Get some debugging info about the cache
    print(session.cache)
    print('Cached URLS:', session.cache.urls)
Exemplo n.º 19
0
 def wrapper(page_html=None, imdb_id=None, soup=None, *args, **kwargs):
     if not (page_html or imdb_id or soup):
         return None
     if imdb_id:
         session = CachedSession(expire_after=60 * 60 * 24)
         r = session.get("https://www.imdb.com/title/{}".format(imdb_id))
         page_html = r.text
     soup = soup or BeautifulSoup(page_html, "html.parser")
     return f(soup, *args, **kwargs)
def get_data(leaderboard, year):
    lb_path = LEADERBOARDS.get(leaderboard, leaderboard)
    url = f"https://adventofcode.com/{year}/leaderboard/private/view/{lb_path}"
    cache_options = {"backend": "filesystem", "expire_after": 86400}
    session = CachedSession("../../site_cache", **cache_options)
    cookies = dict([Path("../.session-cookie").read_text().strip().split("=")])
    response = session.get(url, cookies=cookies)
    if response.status_code != 200:
        pdb.set_trace()
    return response.json()
Exemplo n.º 21
0
def get_page_content(url: str) -> str:
    """
    Utilization function to get page content literally
    :param url: string
    :return: string
    """
    session = CachedSession()
    page = session.get(url)
    soup = BeautifulSoup(page.content, "html.parser")
    content = soup.prettify()
    return content
Exemplo n.º 22
0
    def test_ignore_parameters_get(self):
        url = httpbin("get")
        ignored_param = "ignored"
        usual_param = "some"
        params = {ignored_param: "1", usual_param: "1"}

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          ignored_parameters=[ignored_param])

        r = s.get(url, params=params)
        self.assertIn(ignored_param, r.json()['args'].keys())
        self.assertFalse(r.from_cache)

        self.assertTrue(s.get(url, params=params).from_cache)

        params[ignored_param] = "new"
        self.assertTrue(s.get(url, params=params).from_cache)

        params[usual_param] = "new"
        self.assertFalse(s.get(url, params=params).from_cache)
Exemplo n.º 23
0
 def fetch_df(url):
     r = requests.get(url)
     js = r.json()
     if "features" not in js:
         s = CachedSession()
         with s.cache_disabled():
             r = s.get(url)
             js = r.json()
     features = [feature["attributes"] for feature in js["features"]]
     df = pd.DataFrame(features)
     return df
Exemplo n.º 24
0
    def test_ignore_parameters_get(self):
        url = httpbin("get")
        ignored_param = "ignored"
        usual_param = "some"
        params = {ignored_param: "1", usual_param: "1"}

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          ignored_parameters=[ignored_param])

        r = s.get(url, params=params)
        self.assertIn(ignored_param, r.json()['args'].keys())
        self.assertFalse(r.from_cache)

        self.assertTrue(s.get(url, params=params).from_cache)

        params[ignored_param] = "new"
        self.assertTrue(s.get(url, params=params).from_cache)

        params[usual_param] = "new"
        self.assertFalse(s.get(url, params=params).from_cache)
Exemplo n.º 25
0
    def get(self, resource: str, params=None):
        client = BackendApplicationClient(client_id=self.client_id)
        oauth = OAuth2Session(client=client)
        token = oauth.fetch_token(token_url='https://us.battle.net/oauth/token', client_id=self.client_id, client_secret=self.client_secret)

        if params is None:
            params = {}
        root = 'https://us.api.blizzard.com/'
        params.update({'locale': 'en_US', 'namespace': 'static-us', 'access_token': token['access_token']})

        session = CachedSession(expire_after=timedelta(hours=1))
        return session.get(root + resource, params=params).json()
    def test_loader_with_session(self):
        session = CachedSession(backend='memory', expire_after=100000)
        loadurl = CachableDocumentLoader(use_cache=True, session=session)
        assertion_data = json.loads(test_components['2_0_basic_assertion'])
        context_url = assertion_data['@context']

        set_up_context_mock()
        session.get(context_url)  # precache response
        responses.reset()
        responses.add(responses.GET, context_url, json={'nothing': 'happenin'})

        first_compacted = jsonld.compact(assertion_data,
                                         context_url,
                                         options={'documentLoader': loadurl})
        second_compacted = jsonld.compact(assertion_data,
                                          context_url,
                                          options={'documentLoader': loadurl})

        # second compaction should have built from the cache
        self.assertEqual(first_compacted['verification']['type'],
                         second_compacted['verification']['type'])
Exemplo n.º 27
0
class AstronomyPictureOfThedaySkill(MycroftSkill):
    def __init__(self):
        super(AstronomyPictureOfThedaySkill,
              self).__init__(name="AstronomyPictureOfTheday")
        if "nasa_key" not in self.settings:
            self.settings["nasa_key"] = "DEMO_KEY"
        _expire_after = timedelta(hours=1)
        self._session = CachedSession(backend='memory',
                                      expire_after=_expire_after)

    def update_picture(self):
        try:
            apod_url = "https://api.nasa.gov/planetary/apod?api_key=" + self.settings[
                "nasa_key"]
            response = self._session.get(apod_url).json()
            title = response["title"]
            url = response["url"]
            summary = response["explanation"]
            if not self.lang.lower().startswith("en"):
                summary = translate(summary, self.lang)
                title = translate(title, self.lang)

            self.settings['imgLink'] = url
            self.settings['title'] = title
            self.settings['summary'] = summary
        except Exception as e:
            self.log.exception(e)
        self.gui['imgLink'] = self.settings['imgLink']
        self.gui['title'] = self.settings['title']
        self.gui['summary'] = self.settings['summary']
        self.set_context("APOD")

    @resting_screen_handler("APOD")
    def idle(self, message):
        self.update_picture()
        self.gui.clear()
        self.gui.show_page('idle.qml')

    @intent_file_handler('apod.intent')
    def handle_apod(self, message):
        self.update_picture()
        self.gui.clear()
        self.gui.show_image(self.settings['imgLink'],
                            caption=self.settings['title'],
                            fill='PreserveAspectFit')

        self.speak(self.settings['title'])

    @intent_handler(
        IntentBuilder("ExplainIntent").require("ExplainKeyword").require(
            "APOD"))
    def handle_explain(self, message):
        self.speak(self.settings['summary'])
Exemplo n.º 28
0
def get_csrf_token(session: CachedSession) -> str:
    csrf_token_get_response: CachedResponse = session.get(
        "https://foxford.ru/api/csrf_token",
        headers={"X-Requested-With": "XMLHttpRequest"})

    if csrf_token_get_response.status_code != 200:
        return {"fatal_error": "CSRF token fetch has failed"}

    if "token" not in csrf_token_get_response.json():
        return {"fatal_error": "CSRF token structure is unknown"}

    return csrf_token_get_response.json()["token"]
Exemplo n.º 29
0
def main():
    session = CachedSession('example_cache', backend='sqlite')

    # By default, cached responses never expire
    response = session.get('https://httpbin.org/get')
    assert not response.from_cache
    response = session.get('https://httpbin.org/get')
    assert response.from_cache
    assert not response.expires

    # We can set default expiration for the session using expire_after
    session = CachedSession('example_cache', backend='sqlite', expire_after=60)
    session.cache.clear()
    response = session.get('https://httpbin.org/get')
    response = session.get('https://httpbin.org/get')
    print('Expiration time:', response.expires)

    # This can also be overridden for individual requests
    session.cache.clear()
    response = session.get('https://httpbin.org/get', expire_after=1)
    response = session.get('https://httpbin.org/get')
    assert response.from_cache
    print('Expiration time:', response.expires)

    # After 1 second, the cached value will expired
    time.sleep(1.2)
    assert response.is_expired
    response = session.get('https://httpbin.org/get')
    assert not response.from_cache
Exemplo n.º 30
0
def url_get(url):
    """
    Download an URL using a cache and return the response object
    :param url:
    :return:
    """
    s = None
    info = dict()

    if 'file://' in url:
        s = requests.session()
        s.mount('file://', FileAdapter())
    else:
        s = CachedSession(cache_name="pyff_cache",
                          backend=config.request_cache_backend,
                          expire_after=config.request_cache_time,
                          old_data_on_error=True)
    headers = {'User-Agent': "pyFF/{}".format(__version__), 'Accept': '*/*'}
    try:
        r = s.get(url,
                  headers=headers,
                  verify=False,
                  timeout=config.request_timeout)
    except IOError as ex:
        s = requests.Session()
        r = s.get(url,
                  headers=headers,
                  verify=False,
                  timeout=config.request_timeout)

    if six.PY2:
        r.encoding = "utf-8"

    log.debug("url_get({}) returns {} chrs encoded as {}".format(
        url, len(r.content), r.encoding))

    if config.request_override_encoding is not None:
        r.encoding = config.request_override_encoding

    return r
Exemplo n.º 31
0
 def wrapper(title, year, *args, **kwargs):
     session = CachedSession(expire_after=60 * 60 * 24)
     # https://www.rottentomatoes.com/napi/search/?query=parasite&offset=0&limit=10
     stripped_title = "".join([x for x in title
                               if x.isalnum() or x == " "]).lower().strip()
     response = session.get("https://www.rottentomatoes.com/napi/search",
                            params={
                                "query": stripped_title,
                                "offset": 0,
                                "limit": 10
                            })
     json = response.json()
     return f(stripped_title, year, json, *args, **kwargs)
Exemplo n.º 32
0
def get_profile(request, provider_name):
    provider = SOCIAL_AUTH_PROVIDERS[provider_name]()

    # Prepare the session for fetching the token.
    session = OAuth2Session(
        client_id=provider.client_id,
        scope=provider.scope,
        state=request.session.get('social_login_state', ''),
        redirect_uri=request.build_absolute_uri(
            reverse('social_login_callback',
                    kwargs={'provider_name': provider_name})),
    )

    # Clear the session state data before continuing.
    if 'social_login_state' in request.session:
        del request.session['social_login_state']

    # Go and fetch the oauth token.
    token = session.fetch_token(
        token_url=provider.token_uri,
        client_secret=provider.client_secret,
        authorization_response=request.build_absolute_uri())

    # Get the id_token from the oauth token.
    unparsed_id_token = token['id_token']

    # Retrieve the certificates from the provider, this is in json format and is cached for 1 hour.
    expire_after = datetime.timedelta(hours=1)
    cached_session = CachedSession(backend='memory', expire_after=expire_after)
    provider_certificates = cached_session.get(
        provider.jwks_uri).json().get('keys')

    # Put the certificates in a dict, with the identifier as key.
    certificate_set = {cert['kid']: cert for cert in provider_certificates}

    # Look up which certificate was used to sign the id_token.
    kid = jwt.get_unverified_header(unparsed_id_token).get('kid')

    # Convert the certificate from json to something the jwt library can use.
    certificate = RSAAlgorithm.from_jwk(json.dumps(certificate_set[kid]))

    # Now finally do the actual decoding and verifying.
    id_token = jwt.decode(unparsed_id_token,
                          certificate,
                          audience=provider.client_id)

    # Put the parsed id_token in the response.
    token['id_token'] = id_token

    return provider.parse_profile(session, token)
Exemplo n.º 33
0
def get_entry_url(url: str) -> list:
    """
    Function to extract url of entries from alphabet page url
    :param url: string
    :return: list of urls
    """
    url_list = []
    session = CachedSession()
    page = session.get(url)
    soup = BeautifulSoup(page.content, "html.parser")
    entry_url = soup.find_all("li", class_="list-group-item col-md-4")
    for i in tqdm(entry_url):
        url = "http://devri.bzh" + i.a["href"]
        url_list.append(url)
    return url_list
Exemplo n.º 34
0
    def test_headers_in_get_query(self):
        url = httpbin("get")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, include_get_headers=True)
        headers = {"Accept": "text/json"}
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        headers["Accept"] = "text/xml"
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        headers["X-custom-header"] = "custom"
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        self.assertFalse(s.get(url).from_cache)
        self.assertTrue(s.get(url).from_cache)
Exemplo n.º 35
0
def api(url,alternate_domain="",cache:CachedSession=None):
    log("Appel de "+url)
    if cache:
        data=cache.get(url)
    else:
        data=requests.api.get(url)

    if data.status_code!=200:
        log("Appel de "+url)
        url=url.replace(alternate_domain.split("=")[0],alternate_domain.split("=")[1])

        if cache:
            data=cache.get(url)
        else:
            data=requests.api.get(url)

        if data.status_code!=200:
            log("Echec de l'appel "+str(data.status_code)+" "+data.text)
            return None

    try:
        return data.json()
    except:
        return data.text
Exemplo n.º 36
0
    def test_headers_in_get_query(self):
        url = httpbin("get")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, include_get_headers=True)
        headers = {"Accept": "text/json"}
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        headers["Accept"] = "text/xml"
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        headers["X-custom-header"] = "custom"
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        self.assertFalse(s.get(url).from_cache)
        self.assertTrue(s.get(url).from_cache)
Exemplo n.º 37
0
def api_get(endpoint, query, cache=False):
    if not cache:
        s = CachedSession()
        with s.cache_disabled():
            r = s.get(
                app.config['API_URL'] + endpoint + '?q=' + urllib.parse.quote_plus(json.dumps(query)),
                headers=gen_api_header(),
                verify=app.config['VERIFY_SSL'])
    else:
        r = requests.get(
            app.config['API_URL'] + endpoint + '?q=' + urllib.parse.quote_plus(json.dumps(query)),
            headers=gen_api_header(),
            verify=app.config['VERIFY_SSL'])
    if r.status_code == 200:
        # If created then it returns the object data
        return json.loads(r.text).get('objects')
    else:
        return {}
Exemplo n.º 38
0
    def test_remove_expired_entries(self, datetime_mock, datetime_mock2):
        expire_after = timedelta(minutes=10)
        start_time = datetime.utcnow().replace(year=2010, minute=0)
        datetime_mock.utcnow.return_value = start_time
        datetime_mock2.utcnow.return_value = start_time

        s = CachedSession(CACHE_NAME, CACHE_BACKEND, expire_after=expire_after)
        s.get(httpbin('get'))
        s.get(httpbin('relative-redirect/3'))
        datetime_mock.utcnow.return_value = start_time + expire_after * 2
        datetime_mock2.utcnow.return_value = datetime_mock.utcnow.return_value

        ok_url = 'get?x=1'
        s.get(httpbin(ok_url))
        self.assertEqual(len(s.cache.responses), 3)
        self.assertEqual(len(s.cache.keys_map), 3)
        s.remove_expired_responses()
        self.assertEqual(len(s.cache.responses), 1)
        self.assertEqual(len(s.cache.keys_map), 0)
        self.assertIn(ok_url, list(s.cache.responses.values())[0][0].url)
Exemplo n.º 39
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
from requests_cache import CachedSession

cs = CachedSession(allowable_methods=('GET', 'POST'))
cs.cache.clear()
for i in range(2):
    r = cs.get("http://httpbin.org/get?p1=v1", params={'p2': 'v2', 'p3': 'cyrЯЯ'})
    print r
    print r.from_cache

Exemplo n.º 40
0
class CacheTestCase(unittest.TestCase):

    def setUp(self):
        self.s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE)
        self.s.cache.clear()
        requests_cache.uninstall_cache()

    def test_expire_cache(self):
        delay = 1
        url = httpbin('delay/%s' % delay)
        s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.06)
        t = time.time()
        r = s.get(url)
        delta = time.time() - t
        self.assertGreaterEqual(delta, delay)
        time.sleep(0.5)
        t = time.time()
        r = s.get(url)
        delta = time.time() - t
        self.assertGreaterEqual(delta, delay)

    def test_delete_urls(self):
        url = httpbin('relative-redirect/3')
        r = self.s.get(url)
        for i in range(1, 4):
            self.assert_(self.s.cache.has_url(httpbin('relative-redirect/%s' % i)))
        self.s.cache.delete_url(url)
        self.assert_(not self.s.cache.has_url(url))

    def test_unregistered_backend(self):
        with self.assertRaises(ValueError):
            CachedSession(CACHE_NAME, backend='nonexistent')

    def test_hooks(self):
        state = defaultdict(int)
        for hook in ('response',):  # TODO it's only one hook here

            def hook_func(r, *args, **kwargs):
                state[hook] += 1
                return r
            n = 5
            for i in range(n):
                r = self.s.get(httpbin('get'), hooks={hook: hook_func})
            self.assertEqual(state[hook], n)

    def test_attr_from_cache_in_hook(self):
        state = defaultdict(int)
        hook = 'response'

        def hook_func(r, *args, **kwargs):
            if state[hook] > 0:
                self.assert_(r.from_cache, True)
            state[hook] += 1
            return r
        n = 5
        for i in range(n):
            r = self.s.get(httpbin('get'), hooks={hook: hook_func})
        self.assertEqual(state[hook], n)

    def test_post(self):
        url = httpbin('post')
        r1 = json.loads(self.s.post(url, data={'test1': 'test1'}).text)
        r2 = json.loads(self.s.post(url, data={'test2': 'test2'}).text)
        self.assertIn('test2', r2['form'])
        req = Request('POST', url).prepare()
        self.assert_(not self.s.cache.has_key(self.s.cache.create_key(req)))

    def test_disabled(self):

        url = httpbin('get')
        requests_cache.install_cache(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE)
        requests.get(url)
        with requests_cache.disabled():
            for i in range(2):
                r = requests.get(url)
                self.assertFalse(getattr(r, 'from_cache', False))
        with self.s.cache_disabled():
            for i in range(2):
                r = self.s.get(url)
                self.assertFalse(getattr(r, 'from_cache', False))
        r = self.s.get(url)
        self.assertTrue(getattr(r, 'from_cache', False))

    def test_enabled(self):
        url = httpbin('get')
        options = dict(cache_name=CACHE_NAME, backend=CACHE_BACKEND,
                       fast_save=FAST_SAVE)
        with requests_cache.enabled(**options):
            r = requests.get(url)
            self.assertFalse(getattr(r, 'from_cache', False))
            for i in range(2):
                r = requests.get(url)
                self.assertTrue(getattr(r, 'from_cache', False))
        r = requests.get(url)
        self.assertFalse(getattr(r, 'from_cache', False))

    def test_content_and_cookies(self):
        requests_cache.install_cache(CACHE_NAME, CACHE_BACKEND)
        s = requests.session()
        def js(url):
            return json.loads(s.get(url).text)
        r1 = js(httpbin('cookies/set/test1/test2'))
        with requests_cache.disabled():
            r2 = js(httpbin('cookies'))
        self.assertEqual(r1, r2)
        r3 = js(httpbin('cookies'))
        with requests_cache.disabled():
            r4 = js(httpbin('cookies/set/test3/test4'))
        # from cache
        self.assertEqual(r3, js(httpbin('cookies')))
        # updated
        with requests_cache.disabled():
            self.assertEqual(r4, js(httpbin('cookies')))

    def test_response_history(self):
        r1 = self.s.get(httpbin('relative-redirect/3'))
        def test_redirect_history(url):
            r2 = self.s.get(url)
            self.assertTrue(r2.from_cache)
            for r11, r22 in zip(r1.history, r2.history):
                self.assertEqual(r11.url, r22.url)
        test_redirect_history(httpbin('relative-redirect/3'))
        test_redirect_history(httpbin('relative-redirect/2'))
        r3 = requests.get(httpbin('relative-redirect/1'))
        self.assertEqual(len(r3.history), 1)

    def test_response_history_simple(self):
        r1 = self.s.get(httpbin('relative-redirect/2'))
        r2 = self.s.get(httpbin('relative-redirect/1'))
        self.assertTrue(r2.from_cache)

    def post(self, data):
        return json.loads(self.s.post(httpbin('post'), data=data).text)

    def test_post_params(self):
        # issue #2
        self.s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))

        d = {'param1': 'test1'}
        for _ in range(2):
            self.assertEqual(self.post(d)['form'], d)
            d = {'param1': 'test1', 'param3': 'test3'}
            self.assertEqual(self.post(d)['form'], d)

        self.assertTrue(self.s.post(httpbin('post'), data=d).from_cache)
        d.update({'something': 'else'})
        self.assertFalse(self.s.post(httpbin('post'), data=d).from_cache)

    def test_post_data(self):
        # issue #2, raw payload
        self.s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))
        d1 = json.dumps({'param1': 'test1'})
        d2 = json.dumps({'param1': 'test1', 'param2': 'test2'})
        d3 = str('some unicode data')
        if is_py3:
            bin_data = bytes('some binary data', 'utf8')
        else:
            bin_data = bytes('some binary data')

        for d in (d1, d2, d3):
            self.assertEqual(self.post(d)['data'], d)
            r = self.s.post(httpbin('post'), data=d)
            self.assert_(hasattr(r, 'from_cache'))

        self.assertEqual(self.post(bin_data)['data'],
                         bin_data.decode('utf8'))
        r = self.s.post(httpbin('post'), data=bin_data)
        self.assert_(hasattr(r, 'from_cache'))

    def test_get_params_as_argument(self):
        for _ in range(5):
            p = {'arg1': 'value1'}
            r = self.s.get(httpbin('get'), params=p)
            self.assert_(self.s.cache.has_url( httpbin('get?arg1=value1')))

    def test_https_support(self):
        n = 10
        delay = 1
        url = 'https://httpbin.org/delay/%s?ar1=value1' % delay
        t = time.time()
        for _ in range(n):
            r = self.s.get(url, verify=False)
        self.assertLessEqual(time.time() - t, delay * n / 2)

    def test_from_cache_attribute(self):
        url = httpbin('get?q=1')
        self.assertFalse(self.s.get(url).from_cache)
        self.assertTrue(self.s.get(url).from_cache)
        self.s.cache.clear()
        self.assertFalse(self.s.get(url).from_cache)

    def test_gzip_response(self):
        url = httpbin('gzip')
        self.assertFalse(self.s.get(url).from_cache)
        self.assertTrue(self.s.get(url).from_cache)

    def test_close_response(self):
        for _ in range(3):
            r = self.s.get(httpbin("get"))
            r.close()

    def test_get_parameters_normalization(self):
        url = httpbin("get")
        params = {"a": "a", "b": ["1", "2", "3"], "c": "4"}

        self.assertFalse(self.s.get(url, params=params).from_cache)
        r = self.s.get(url, params=params)
        self.assertTrue(r.from_cache)
        self.assertEquals(r.json()["args"], params)
        self.assertFalse(self.s.get(url, params={"a": "b"}).from_cache)
        self.assertTrue(self.s.get(url, params=sorted(params.items())).from_cache)

        class UserSubclass(dict):
            def items(self):
                return sorted(super(UserSubclass, self).items(), reverse=True)

        params["z"] = "5"
        custom_dict = UserSubclass(params)
        self.assertFalse(self.s.get(url, params=custom_dict).from_cache)
        self.assertTrue(self.s.get(url, params=custom_dict).from_cache)

    def test_post_parameters_normalization(self):
        params = {"a": "a", "b": ["1", "2", "3"], "c": "4"}
        url = httpbin("post")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          allowable_methods=('GET', 'POST'))
        self.assertFalse(s.post(url, data=params).from_cache)
        self.assertTrue(s.post(url, data=params).from_cache)
        self.assertTrue(s.post(url, data=sorted(params.items())).from_cache)
        self.assertFalse(s.post(url, data=sorted(params.items(), reverse=True)).from_cache)

    def test_stream_requests_support(self):
        n = 100
        url = httpbin("stream/%s" % n)
        r = self.s.get(url, stream=True)
        lines = list(r.iter_lines())
        self.assertEquals(len(lines), n)

        for i in range(2):
            r = self.s.get(url, stream=True)
            self.assertTrue(r.from_cache)
            cached_lines = list(r.iter_lines())
            self.assertEquals(cached_lines, lines)

    def test_headers_in_get_query(self):
        url = httpbin("get")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, include_get_headers=True)
        headers = {"Accept": "text/json"}
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        headers["Accept"] = "text/xml"
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        headers["X-custom-header"] = "custom"
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        self.assertFalse(s.get(url).from_cache)
        self.assertTrue(s.get(url).from_cache)

    def test_str_and_repr(self):
        s = repr(CachedSession(CACHE_NAME, CACHE_BACKEND, expire_after=10))
        self.assertIn(CACHE_NAME, s)
        self.assertIn("10", s)

    @mock.patch("requests_cache.core.datetime")
    def test_return_old_data_on_error(self, datetime_mock):
        datetime_mock.utcnow.return_value = datetime.utcnow()
        expire_after = 100
        url = httpbin("get")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, old_data_on_error=True, expire_after=expire_after)
        header = "X-Tst"

        def get(n):
            return s.get(url, headers={header: n}).json()["headers"][header]

        get("expired")
        self.assertEquals(get("2"), "expired")
        datetime_mock.utcnow.return_value = datetime.utcnow() + timedelta(seconds=expire_after * 2)

        with mock.patch.object(s.cache, "save_response", side_effect=Exception):
            self.assertEquals(get("3"), "expired")

        with mock.patch("requests_cache.core.OriginalSession.send") as send_mock:
            resp_mock = requests.Response()
            request = requests.Request("GET", url)
            resp_mock.request = request.prepare()
            resp_mock.status_code = 400
            resp_mock._content = '{"other": "content"}'
            send_mock.return_value = resp_mock
            self.assertEquals(get("3"), "expired")

            resp_mock.status_code = 200
            self.assertIs(s.get(url).content, resp_mock.content)

        # default behaviour
        datetime_mock.return_value = datetime.utcnow() + timedelta(seconds=expire_after * 2)
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, old_data_on_error=False, expire_after=100)
        with mock.patch.object(s.cache, "save_response", side_effect=Exception):
            with self.assertRaises(Exception):
                s.get(url)

    def test_ignore_parameters_get(self):
        url = httpbin("get")
        ignored_param = "ignored"
        usual_param = "some"
        params = {ignored_param: "1", usual_param: "1"}

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          ignored_parameters=[ignored_param])

        r = s.get(url, params=params)
        self.assertIn(ignored_param, r.json()['args'].keys())
        self.assertFalse(r.from_cache)

        self.assertTrue(s.get(url, params=params).from_cache)

        params[ignored_param] = "new"
        self.assertTrue(s.get(url, params=params).from_cache)

        params[usual_param] = "new"
        self.assertFalse(s.get(url, params=params).from_cache)

    def test_ignore_parameters_post(self):
        url = httpbin("post")
        ignored_param = "ignored"
        usual_param = "some"
        d = {ignored_param: "1", usual_param: "1"}

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          allowable_methods=('POST'),
                          ignored_parameters=[ignored_param])

        r = s.post(url, data=d)
        self.assertIn(ignored_param, r.json()['form'].keys())
        self.assertFalse(r.from_cache)

        self.assertTrue(s.post(url, data=d).from_cache)

        d[ignored_param] = "new"
        self.assertTrue(s.post(url, data=d).from_cache)

        d[usual_param] = "new"
        self.assertFalse(s.post(url, data=d).from_cache)

    def test_ignore_parameters_post_json(self):
        url = httpbin("post")
        ignored_param = "ignored"
        usual_param = "some"
        d = {ignored_param: "1", usual_param: "1"}

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          allowable_methods=('POST'),
                          ignored_parameters=[ignored_param])

        r = s.post(url, json=d)
        self.assertIn(ignored_param, json.loads(r.json()['data']).keys())
        self.assertFalse(r.from_cache)

        self.assertTrue(s.post(url, json=d).from_cache)

        d[ignored_param] = "new"
        self.assertTrue(s.post(url, json=d).from_cache)

        d[usual_param] = "new"
        self.assertFalse(s.post(url, json=d).from_cache)

    def test_ignore_parameters_post_raw(self):
        url = httpbin("post")
        ignored_param = "ignored"
        raw_data = "raw test data"

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          allowable_methods=('POST'),
                          ignored_parameters=[ignored_param])

        self.assertFalse(s.post(url, data=raw_data).from_cache)
        self.assertTrue(s.post(url, data=raw_data).from_cache)

        raw_data = "new raw data"
        self.assertFalse(s.post(url, data=raw_data).from_cache)

    @mock.patch("requests_cache.backends.base.datetime")
    @mock.patch("requests_cache.core.datetime")
    def test_remove_expired_entries(self, datetime_mock, datetime_mock2):
        expire_after = timedelta(minutes=10)
        start_time = datetime.utcnow().replace(year=2010, minute=0)
        datetime_mock.utcnow.return_value = start_time
        datetime_mock2.utcnow.return_value = start_time

        s = CachedSession(CACHE_NAME, CACHE_BACKEND, expire_after=expire_after)
        s.get(httpbin('get'))
        s.get(httpbin('relative-redirect/3'))
        datetime_mock.utcnow.return_value = start_time + expire_after * 2
        datetime_mock2.utcnow.return_value = datetime_mock.utcnow.return_value

        ok_url = 'get?x=1'
        s.get(httpbin(ok_url))
        self.assertEqual(len(s.cache.responses), 3)
        self.assertEqual(len(s.cache.keys_map), 3)
        s.remove_expired_responses()
        self.assertEqual(len(s.cache.responses), 1)
        self.assertEqual(len(s.cache.keys_map), 0)
        self.assertIn(ok_url, list(s.cache.responses.values())[0][0].url)
Exemplo n.º 41
0
class DownloadExternalResource(object):
    def __init__(self,
                 url=None,
                 max_file_size=None,
                 allowed_content_types=(),
                 jsfetcher=None,
                 marker=r'<meta.*name="Generator".*>'):
        logger.info('Running downloader for url: %s' % url)
        self.url = str(url)
        self.max_file_size = max_file_size or settings.RFDOCS.get('MAX_FILE_SIZE')
        self.allowed_content_types = allowed_content_types or settings.RFDOCS.get('ALLOWED_CONTENT_TYPES')

        self.s = CachedSession()
        self.r = None
        self.error = {}
        self.jsfetcher = jsfetcher
        self.marker = marker

    def _set_error(self, key, value):
        self.error[key] = value
        return self.error

    def _no_error(self):
        self.error = {}
        return self.error

    def send_request(self):
        logger.info('Download external resource: %s' % self.url)
        try:
            self.r = self.s.get(self.url)
        except (requests.exceptions.MissingSchema,
                requests.exceptions.InvalidSchema,
                requests.exceptions.ConnectionError,) as error:
            logger.warn("Failed to download resource. Error: %s" % error)
            self._set_error('error', error)

    def get_response(self):
        # Workaround for Robot Framework libraries with version >= 2.8 (or even 2.7).
        # Libraries are generated with JQuery templates system.
        # Python's `requests` module fetches raw content, of course not rendered HTML.
        # Thus parser will fail in such cases.
        # For this reason an awesome PhantomJS is used. Unfortunately `requests_cache` module
        # wont help here because PhantomJS has to request the URL by its
        # own methods to be able to render HTML.
        # One more notice is that I don't use PhantomJS native features to actually write data to filesystem.
        # (PhantomJS has `fs` module to work with filesystem).
        # Why? I don't know yet :)

        # Instead use `subprocess.Popen` to execute javascript code,
        # let PhantomJS to do his job and give the output back to python.

        # Anyway I want the resource to be validated by Django's validators before we can proceed.
        # So if we came here, the validation has passed.

        # First, check if response content has
        # meta tag with name="Generator"
        # If so, the document uses JQuery templates plugin system and we need help of PhantomJS.
        content = self.r.content
        mo = re.search(self.marker, content, re.DOTALL | re.M | re.I)
        if not mo:
            return content
        logger.info('Using phantomjs to download resource')
        alternate_downloader = PhantomJSHelper(url=self.url, error_callback=self._set_error)
        return alternate_downloader.get_content()

    def validate_response(self):
        logger.info('Validate external resource: %s' % self.url)
        if not self.r:
            self.send_request()
        if self.error:
            return self.error
        try:
            self.r.raise_for_status()
        except requests.exceptions.HTTPError as error:
            logger.warn("Failed to fetch resource. Error: %s" % error)
            return self._set_error('error', error)
        if self.r.status_code == requests.codes.ok:
            content_len = self.r.headers.get('content-length', None)
            if content_len:
                csize = int(content_len)
            else:
                csize = len(self.r.content)
            if csize < self.max_file_size:
                ctype_header = self.r.headers.get('content-type')
                if not ctype_header:
                    return self._set_error('content_type',
                                           'Response does not contain the \'Content-Type\' header. Rejected.')
                ctype = ctype_header.split(';')[0].lower()
                if ctype in [ct.lower() for ct in self.allowed_content_types]:
                    # the place where all the procedure passed and we succeeded
                    return self._no_error()
                else:
                    logger.warn("Failed to fetch resource. "
                                "Allowed content types are: %s."
                                "The content type is: %s" % (', '.join(self.allowed_content_types), ctype,))
                    return self._set_error('content_type', ctype)

            else:
                logger.warn("Failed to fetch resource. "
                            "The content size \'%s\' exceeds maximum allowed size: %s" % (self.max_file_size, csize))
                return self._set_error('content_size', csize)
        else:
            error = requests.exceptions.HTTPError(self.r.status_code)
            logger.warn("Failed to fetch resource. Error: %s" % error)
            return self._set_error('error', error)

    def get_response_from_cache(self):
        if not self.s.cache.has_url(self.url):
            self.send_request()
            self.validate_response()
            return self.get_response()
        self.r = self.s.get(self.url)
        return self.get_response()

    def get_response_from_cache_or_raise_error(self):
        response = self.get_response_from_cache()
        if self.error:
            err = self.error.get('error')
            # these are model's `clean` ValidationError (not the same as forms.ValidationError)
            if err:
                raise exceptions.ValidationError(err)
            else:
                raise exceptions.ValidationError(self.error)
        return response
Exemplo n.º 42
0
class CacheTestCase(unittest.TestCase):

    def setUp(self):
        self.s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE)
        self.s.cache.clear()
        requests_cache.uninstall_cache()

    def test_expire_cache(self):
        delay = 1
        url = httpbin('delay/%s' % delay)
        s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.06)
        t = time.time()
        r = s.get(url)
        delta = time.time() - t
        self.assertGreaterEqual(delta, delay)
        time.sleep(0.5)
        t = time.time()
        r = s.get(url)
        delta = time.time() - t
        self.assertGreaterEqual(delta, delay)

    def test_delete_urls(self):
        url = httpbin('redirect/3')
        r = self.s.get(url)
        for i in range(1, 4):
            self.assert_(self.s.cache.has_url(httpbin('redirect/%s' % i)))
        self.s.cache.delete_url(url)
        self.assert_(not self.s.cache.has_url(url))

    def test_unregistered_backend(self):
        with self.assertRaises(ValueError):
            CachedSession(CACHE_NAME, backend='nonexistent')

#    def test_async_compatibility(self):
#        try:
#            import grequests
#        except Exception:
#            self.skipTest('gevent is not installed')
#        n = 3
#        def long_running():
#            t = time.time()
#            rs = [grequests.get(httpbin('delay/%s' % i)) for i in range(n + 1)]
#            grequests.map(rs)
#            return time.time() - t
#        # cache it
#        delta = long_running()
#        self.assertGreaterEqual(delta, n)
#        # fast from cache
#        delta = 0
#        for i in range(n):
#            delta += long_running()
#        self.assertLessEqual(delta, 1)

    def test_hooks(self):
        state = defaultdict(int)
        for hook in ('response',):  # TODO it's only one hook here

            def hook_func(r, *args, **kwargs):
                state[hook] += 1
                return r
            n = 5
            for i in range(n):
                r = self.s.get(httpbin('get'), hooks={hook: hook_func})
            self.assertEqual(state[hook], n)

    def test_attr_from_cache_in_hook(self):
        state = defaultdict(int)
        hook = 'response'

        def hook_func(r, *args, **kwargs):
            if state[hook] > 0:
                self.assert_(r.from_cache, True)
            state[hook] += 1
            return r
        n = 5
        for i in range(n):
            r = self.s.get(httpbin('get'), hooks={hook: hook_func})
        self.assertEqual(state[hook], n)

    def test_post(self):
        url = httpbin('post')
        r1 = json.loads(self.s.post(url, data={'test1': 'test1'}).text)
        r2 = json.loads(self.s.post(url, data={'test2': 'test2'}).text)
        self.assertIn('test2', r2['form'])
        req = Request('POST', url).prepare()
        self.assert_(not self.s.cache.has_key(self.s.cache.create_key(req)))

    def test_disabled(self):

        url = httpbin('get')
        requests_cache.install_cache(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE)
        requests.get(url)
        with requests_cache.disabled():
            for i in range(2):
                r = requests.get(url)
                self.assertFalse(getattr(r, 'from_cache', False))
        with self.s.cache_disabled():
            for i in range(2):
                r = self.s.get(url)
                self.assertFalse(getattr(r, 'from_cache', False))
        r = self.s.get(url)
        self.assertTrue(getattr(r, 'from_cache', False))

    def test_enabled(self):
        url = httpbin('get')
        options = dict(cache_name=CACHE_NAME, backend=CACHE_BACKEND,
                       fast_save=FAST_SAVE)
        with requests_cache.enabled(**options):
            r = requests.get(url)
            self.assertFalse(getattr(r, 'from_cache', False))
            for i in range(2):
                r = requests.get(url)
                self.assertTrue(getattr(r, 'from_cache', False))
        r = requests.get(url)
        self.assertFalse(getattr(r, 'from_cache', False))

    def test_content_and_cookies(self):
        requests_cache.install_cache(CACHE_NAME, CACHE_BACKEND)
        s = requests.session()
        def js(url):
            return json.loads(s.get(url).text)
        r1 = js(httpbin('cookies/set/test1/test2'))
        with requests_cache.disabled():
            r2 = js(httpbin('cookies'))
        self.assertEqual(r1, r2)
        r3 = js(httpbin('cookies'))
        with requests_cache.disabled():
            r4 = js(httpbin('cookies/set/test3/test4'))
        # from cache
        self.assertEqual(r3, js(httpbin('cookies')))
        # updated
        with requests_cache.disabled():
            self.assertEqual(r4, js(httpbin('cookies')))

    def test_response_history(self):
        r1 = self.s.get(httpbin('redirect/3'))
        def test_redirect_history(url):
            r2 = self.s.get(url)
            self.assertTrue(r2.from_cache)
            for r11, r22 in zip(r1.history, r2.history):
                self.assertEqual(r11.url, r22.url)
        test_redirect_history(httpbin('redirect/3'))
        test_redirect_history(httpbin('redirect/2'))
        r3 = requests.get(httpbin('redirect/1'))
        self.assertEqual(len(r3.history), 1)

    def test_response_history_simple(self):
        r1 = self.s.get(httpbin('redirect/2'))
        r2 = self.s.get(httpbin('redirect/1'))
        self.assertTrue(r2.from_cache)

    def post(self, data):
        return json.loads(self.s.post(httpbin('post'), data=data).text)

    def test_post_params(self):
        # issue #2
        self.s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))

        d = {'param1': 'test1'}
        for _ in range(2):
            self.assertEqual(self.post(d)['form'], d)
            d = {'param1': 'test1', 'param3': 'test3'}
            self.assertEqual(self.post(d)['form'], d)

        self.assertTrue(self.s.post(httpbin('post'), data=d).from_cache)
        d.update({'something': 'else'})
        self.assertFalse(self.s.post(httpbin('post'), data=d).from_cache)

    def test_post_data(self):
        # issue #2, raw payload
        self.s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))
        d1 = json.dumps({'param1': 'test1'})
        d2 = json.dumps({'param1': 'test1', 'param2': 'test2'})
        d3 = str('some unicode data')
        if is_py3:
            bin_data = bytes('some binary data', 'utf8')
        else:
            bin_data = bytes('some binary data')

        for d in (d1, d2, d3):
            self.assertEqual(self.post(d)['data'], d)
            r = self.s.post(httpbin('post'), data=d)
            self.assert_(hasattr(r, 'from_cache'))

        self.assertEqual(self.post(bin_data)['data'],
                         bin_data.decode('utf8'))
        r = self.s.post(httpbin('post'), data=bin_data)
        self.assert_(hasattr(r, 'from_cache'))

    def test_get_params_as_argument(self):
        for _ in range(5):
            p = {'arg1': 'value1'}
            r = self.s.get(httpbin('get'), params=p)
            self.assert_(self.s.cache.has_url( httpbin('get?arg1=value1')))

    def test_https_support(self):
        n = 10
        delay = 1
        url = 'https://httpbin.org/delay/%s?ar1=value1' % delay
        t = time.time()
        for _ in range(n):
            r = self.s.get(url, verify=False)
        self.assertLessEqual(time.time() - t, delay * n / 2)

    def test_from_cache_attribute(self):
        url = httpbin('get?q=1')
        self.assertFalse(self.s.get(url).from_cache)
        self.assertTrue(self.s.get(url).from_cache)
        self.s.cache.clear()
        self.assertFalse(self.s.get(url).from_cache)

    def test_gzip_response(self):
        url = httpbin('gzip')
        self.assertFalse(self.s.get(url).from_cache)
        self.assertTrue(self.s.get(url).from_cache)