Exemplo n.º 1
0
    def test_custom_heuristic(self):
        def request_callback(request, uri, response_headers):
            return [200, response_headers, json.dumps({"epoch": time.time()})]

        httpretty.register_uri(
            httpretty.GET, "https://now.httpbin.org", body=request_callback
        )

        session = CachedSession(
            fallback_cache_duration=2,
            file_cache_directory=file_cache_directory,
        )

        # with a 2s retention, and a 1.1s time between requests, 2 of the
        # request should have the same epoch, where as the 3rd gets fresh data
        # the first requests gets send at t=0

        with freeze_time("2012-01-14 12:00:01") as freezer:
            response_1 = session.get("https://now.httpbin.org")
            freezer.tick()

            response_2 = session.get("https://now.httpbin.org")
            freezer.tick()

            response_3 = session.get("https://now.httpbin.org")

            self.assertEqual(response_1.text, response_2.text)
            self.assertNotEqual(response_2.text, response_3.text)
Exemplo n.º 2
0
    def test_default_heuristic(self):
        def request_callback(request, uri, response_headers):
            return [200, response_headers, json.dumps({"epoch": time.time()})]

        httpretty.register_uri(
            httpretty.GET, "https://now.httpbin.org", body=request_callback
        )

        session = CachedSession(file_cache_directory=file_cache_directory)

        with freeze_time("2012-01-14 12:00:01") as freezer:

            response_1 = session.get("https://now.httpbin.org")
            freezer.tick()
            freezer.tick()

            response_2 = session.get("https://now.httpbin.org")
            freezer.tick()
            freezer.tick()
            freezer.tick()

            response_3 = session.get("https://now.httpbin.org")

            self.assertEqual(response_1.text, response_2.text)
            self.assertNotEqual(response_2.text, response_3.text)
Exemplo n.º 3
0
    def __init__(self, base_url, frontpage_id, session_class=CachedSession):
        """
        @param base_url: The Discourse URL (e.g. https://discourse.example.com)
        @param frontpage_id: The ID of the frontpage topic in Discourse.
                            This topic should also contain the navigation.
        """

        self.base_url = base_url.rstrip("/")
        self.frontpage_id = frontpage_id
        self.session = CachedSession(expire_after=300)
Exemplo n.º 4
0
    def test_redis_cache(self):
        class FakeConnectionPool:
            def __init__(self, name):
                self.name = name

        # our mock will be called here. Passing a connection_pool with a name
        # makes sure that we can identify the different redis mocks

        redis_mock_1 = redis.Redis(
            connection_pool=FakeConnectionPool(name="test1")
        )
        redis_mock_2 = redis.Redis(
            connection_pool=FakeConnectionPool(name="test2")
        )

        self.assertNotEqual(redis_mock_1, redis_mock_2)

        def request_callback(request, uri, response_headers):
            return [200, response_headers, json.dumps({"epoch": time.time()})]

        httpretty.register_uri(
            httpretty.GET, "https://now.httpbin.org", body=request_callback
        )

        with freeze_time("2012-01-14 12:00:01") as freezer:
            session_1 = CachedSession(
                redis_connection=redis_mock_1, fallback_cache_duration=500
            )
            session_2 = CachedSession(
                redis_connection=redis_mock_2, fallback_cache_duration=1
            )

            resp_1 = session_1.get("https://now.httpbin.org")
            resp_2 = session_2.get("https://now.httpbin.org")

            self.assertNotEqual(resp_1.text, resp_2.text)

            freezer.tick()

            resp_3 = session_2.get("https://now.httpbin.org")

            self.assertNotEqual(resp_2.text, resp_3.text)

            session_3 = CachedSession(
                redis_connection=redis_mock_1, fallback_cache_duration=1
            )

            resp_4 = session_3.get("https://now.httpbin.org")

            self.assertEqual(resp_1.text, resp_4.text)
Exemplo n.º 5
0
    def test_timeout_adapter(self):
        session = CachedSession(
            timeout=2, file_cache_directory=file_cache_directory
        )

        # this test can be inconsistent on multiple concurrent
        # runs due to the use of time.sleep
        with self.assertRaises(
            (
                requests.exceptions.ConnectTimeout,
                requests.exceptions.ReadTimeout,
            )
        ):
            session.get("https://httpbin.org/delay/3")

        resp = session.get("https://httpbin.org/delay/1")

        self.assertIsNotNone(resp)
    def __init__(self,
                 base_url,
                 session=CachedSession(fallback_cache_duration=300)):
        """
        @param base_url: The Discourse URL (e.g. https://discourse.example.com)
        """

        self.base_url = base_url.rstrip("/")
        self.session = session
Exemplo n.º 7
0
    def test_file_cache(self):
        def request_callback(request, uri, response_headers):
            return [200, response_headers, json.dumps({"epoch": time.time()})]

        httpretty.register_uri(
            httpretty.GET, "https://now.httpbin.org", body=request_callback
        )

        cache_dir_1 = ".test1"
        cache_dir_2 = ".test2"

        session_1 = CachedSession(
            file_cache_directory=cache_dir_1, fallback_cache_duration=2000
        )
        session_2 = CachedSession(file_cache_directory=cache_dir_2)

        resp_1 = session_1.get("https://now.httpbin.org")

        self.assertEqual(os.path.isdir(cache_dir_1), True)

        resp_2 = session_2.get("https://now.httpbin.org")

        self.assertEqual(os.path.isdir(cache_dir_2), True)
        self.assertNotEqual(resp_1.text, resp_2.text)

        shutil.rmtree(cache_dir_2)

        self.assertEqual(os.path.isdir(cache_dir_2), False)

        resp_3 = session_2.get("https://now.httpbin.org")

        self.assertEqual(os.path.isdir(cache_dir_2), True)
        self.assertNotEqual(resp_2.text, resp_3.text)

        session_3 = CachedSession(
            file_cache_directory=cache_dir_1, fallback_cache_duration=2000
        )

        resp_4 = session_3.get("https://now.httpbin.org")

        self.assertEqual(resp_1.text, resp_4.text)

        shutil.rmtree(cache_dir_1)
        shutil.rmtree(cache_dir_2)
Exemplo n.º 8
0
    def test_cache_control_no_cache_overwrites_custom_heuristic(self):
        def request_callback(request, uri, response_headers):
            return [200, response_headers, json.dumps({"epoch": time.time()})]

        httpretty.register_uri(
            httpretty.GET,
            "https://now.httpbin.org",
            body=request_callback,
            adding_headers={"Cache-Control": "no-cache"},
        )
        session = CachedSession(file_cache_directory=file_cache_directory)

        # with no-cache set, no request should be cached,
        # thus all bodies are different
        response_1 = session.get("https://now.httpbin.org")
        response_2 = session.get("https://now.httpbin.org")
        response_3 = session.get("https://now.httpbin.org")

        self.assertNotEqual(response_1.text, response_2.text)
        self.assertNotEqual(response_2.text, response_3.text)
Exemplo n.º 9
0
    def test_cache_control_max_age_overwrites_custom_heuristic(self):
        def request_callback(request, uri, response_headers):
            return [200, response_headers, json.dumps({"epoch": time.time()})]

        httpretty.register_uri(
            httpretty.GET,
            "https://now.httpbin.org",
            body=request_callback,
            adding_headers={"Cache-Control": "max-age=2"},
        )

        session = CachedSession(file_cache_directory=file_cache_directory)

        with freeze_time("2012-01-14 12:00:01") as freezer:

            response_1 = session.get("https://now.httpbin.org")
            freezer.tick()
            response_2 = session.get("https://now.httpbin.org")
            freezer.tick()
            response_3 = session.get("https://now.httpbin.org")

            self.assertEqual(response_1.text, response_2.text)
            self.assertNotEqual(response_2.text, response_3.text)
Exemplo n.º 10
0
def get_search_results(
        api_key,
        query,
        search_engine_id,
        start=None,
        num=None,
        siteSearch=None,
        session=CachedSession(fallback_cache_duration=600),
):
    """
    Query the Google Custom Search API for search results
    """

    response = session.get(
        "https://www.googleapis.com/customsearch/v1",
        params={
            "key": api_key,
            "cx": search_engine_id,
            "q": query,
            "start": start,
            "num": num,
            "siteSearch": siteSearch,
        },
    )

    response.raise_for_status()

    results = response.json()

    if "items" in results:
        # Move "items" to "entries" as "items" is a method name for dicts
        results["entries"] = results.pop("items")

        # Remove newlines from the snippet
        for item in results["entries"]:
            if "htmlSnippet" in item:
                item["htmlSnippet"] = item["htmlSnippet"].replace("<br>\n", "")

    return results
Exemplo n.º 11
0
class DiscourseDocs:
    """
    A basic model class for retrieving Documentation content
    from a Discourse installation through the API
    """
    def __init__(self, base_url, frontpage_id, session_class=CachedSession):
        """
        @param base_url: The Discourse URL (e.g. https://discourse.example.com)
        @param frontpage_id: The ID of the frontpage topic in Discourse.
                            This topic should also contain the navigation.
        """

        self.base_url = base_url.rstrip("/")
        self.frontpage_id = frontpage_id
        self.session = CachedSession(expire_after=300)

    def get_topic(self, path):
        """
        Retrieve topic object by path
        """

        response = self.session.get(f"{self.base_url}/t/{path}.json",
                                    allow_redirects=False)
        response.raise_for_status()

        if response.status_code >= 300:
            raise RedirectFoundError(response=response)

        return response.json()

    def parse_topic(self, topic):
        return {
            "title":
            topic["title"],
            "body_html":
            topic["post_stream"]["posts"][0]["cooked"],
            "updated":
            dateutil.parser.parse(
                topic["post_stream"]["posts"][0]["updated_at"]),
            "forum_link":
            f"{self.base_url}/t/{topic['slug']}/{topic['id']}",
            "path":
            f"/t/{topic['slug']}/{topic['id']}",
        }

    def get_frontpage(self):
        # Get topic data
        topic = self.get_topic(self.frontpage_id)
        frontpage = self.parse_topic(topic)

        # Split HTML into nav and body
        frontpage_html = frontpage["body_html"]
        frontpage_soup = BeautifulSoup(frontpage_html, features="html.parser")
        frontpage_splitpoint = frontpage_soup.find(re.compile("^h[1-6]$"),
                                                   text="Content")
        content_elements = frontpage_splitpoint.fetchPreviousSiblings()
        nav_elements = frontpage_splitpoint.fetchNextSiblings()

        # Update frontpage
        frontpage["body_html"] = "\n".join(map(str,
                                               reversed(content_elements)))
        nav_html = "\n".join(map(str, nav_elements))

        return frontpage, nav_html

    def get_document(self, path):
        """
        Retrieve and return relevant data about a document:
        - Title
        - HTML content
        - Navigation content
        """

        document, nav_html = self.get_frontpage()

        if f"/t/{path}" != document["path"]:
            topic = self.get_topic(path)
            document = self.parse_topic(topic)

        return document, nav_html
import os

from canonicalwebteam.http import CachedSession

API_URL = os.getenv("BLOG_API",
                    "https://admin.insights.ubuntu.com/wp-json/wp/v2")

api_session = CachedSession(fallback_cache_duration=3600)

tags = {}


def process_response(response):
    response.raise_for_status()

    return response.json()


def build_get_article_url(
    tags=[],
    per_page=12,
    page=1,
    tags_exclude=[],
    exclude=[],
    categories=[],
    sticky="",
    groups=[],
    after="",
    before="",
    author="",
):
Exemplo n.º 13
0
# Packages
from canonicalwebteam.http import CachedSession

# Constants
SEARCH_SESSION = CachedSession(fallback_cache_duration=600)


class NoAPIKeyError(Exception):
    pass


def get_search_results(
    api_key,
    api_url,
    search_custom_id,
    query,
    start,
    num,
    session=SEARCH_SESSION,
):
    """
    Query the Google Custom Search API for search results
    """
    if not api_key:
        raise NoAPIKeyError("Unable to search: No API key provided")

    results = session.get(
        api_url,
        params={
            "key": api_key,
            "cx": search_custom_id,
Exemplo n.º 14
0
# Packages
from canonicalwebteam.http import CachedSession

# Constants
SEARCH_SESSION = CachedSession(expire_after=600)


class NoAPIKeyError(Exception):
    pass


def get_search_results(
    api_key,
    api_url,
    search_custom_id,
    query,
    start,
    num,
    session=SEARCH_SESSION,
):
    """
    Query the Google Custom Search API for search results
    """
    if not api_key:
        raise NoAPIKeyError("Unable to search: No API key provided")

    results = session.get(
        api_url,
        params={
            "key": api_key,
            "cx": search_custom_id,
Exemplo n.º 15
0
from django.views.generic.base import TemplateView
from django_template_finder_view import TemplateFinder
from django.conf import settings
from django.shortcuts import render
from feedparser import parse
from canonicalwebteam.http import CachedSession
import traceback

try:
    from urllib.parse import urlencode
except ImportError:
    from urllib import urlencode

# Search service
if settings.SEARCH_API_KEY:
    search_session = CachedSession(
        expire_after=settings.SEARCH_CACHE_EXPIRY_SECONDS)


def _get_search_results(query, start, num):
    """
    Query the Google Custom Search API for search results
    """

    if not settings.SEARCH_API_KEY:
        raise Exception('Unable to search: No API key provided')

    results = search_session.get(settings.SEARCH_API_URL,
                                 params={
                                     'key': settings.SEARCH_API_KEY,
                                     'cx': settings.CUSTOM_SEARCH_ID,
                                     'q': query,
Exemplo n.º 16
0
import base64
import json
import os
import re
import requests

from canonicalwebteam.http import CachedSession
from html import unescape

api_session = CachedSession(fallback_cache_duration=300,
                            file_cache_directory=".webcache")

base_url = "https://boards-api.greenhouse.io/v1/boards/Canonical/jobs"


def get_vacancies(department):
    feed = api_session.get(f"{base_url}?content=true").json()
    path_department = remove_hyphens(department)
    vacancies = []
    for job in feed["jobs"]:
        feed_department = remove_hyphens(job["metadata"][2]["value"])
        if path_department.lower() == "all":
            vacancies.append({
                "title": job["title"],
                "content": unescape(job["content"]),
                "url": job["absolute_url"],
                "location": job["location"]["name"],
                "id": job["id"],
                "employment": job["metadata"][0]["value"],
                "date": job["metadata"][1]["value"],
                "department": job["metadata"][2]["value"],
Exemplo n.º 17
0
from copy import copy
from django.views.generic.base import TemplateView
from django.conf import settings
from django.shortcuts import render
from feedparser import parse
from canonicalwebteam.http import CachedSession
import traceback

try:
    from urllib.parse import urlencode
except ImportError:
    from urllib import urlencode

# Search service
if settings.SEARCH_API_KEY:
    search_session = CachedSession(
        fallback_cache_duration=settings.SEARCH_CACHE_EXPIRY_SECONDS)


def _get_search_results(query, start, num, site=None):
    """
    Query the Google Custom Search API for search results
    """

    if not settings.SEARCH_API_KEY:
        raise Exception("Unable to search: No API key provided")

    results = search_session.get(
        settings.SEARCH_API_URL,
        params={
            "key": settings.SEARCH_API_KEY,
            "cx": settings.CUSTOM_SEARCH_ID,
Exemplo n.º 18
0
import logging
from canonicalwebteam.http import CachedSession

# this part is temporarily included until
# https://github.com/canonical-webteam/get-feeds
# is updated for flask applications
requests_timeout = 10
expiry_seconds = 300

cached_request = CachedSession(fallback_cache_duration=expiry_seconds)
logger = logging.getLogger(__name__)


def get(url):
    try:
        response = cached_request.get(url, timeout=requests_timeout)
        response.raise_for_status()
    except Exception as request_error:
        logger.warning("Attempt to get feed failed: {}".format(
            str(request_error)))
        return ""

    return response
Exemplo n.º 19
0
# Core
import re

# Third-party
import dateutil.parser
import humanize
from bs4 import BeautifulSoup
from requests.exceptions import HTTPError
from urllib.parse import urlparse
from canonicalwebteam.http import CachedSession
from jinja2 import Template

# Constants
DEFAULT_SESSION = CachedSession(expire_after=300, old_data_on_error=True)


class RedirectFoundError(HTTPError):
    """
    If we encounter redirects from Discourse, we need to take action
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        url_parts = urlparse(self.response.headers["Location"])
        self.redirect_path = re.sub("/t(/.*).json", r"\1", url_parts.path)


class NavigationParseError(Exception):
    """
    Indicates a failure to extract the navigation from
    the frontpage content
    """
Exemplo n.º 20
0
class DiscourseDocs:
    """
    A basic model class for retrieving Documentation content
    from a Discourse installation through the API
    """
    def __init__(self, base_url, frontpage_id, session_class=CachedSession):
        """
        @param base_url: The Discourse URL (e.g. https://discourse.example.com)
        @param frontpage_id: The ID of the frontpage topic in Discourse.
                            This topic should also contain the navigation.
        """

        self.base_url = base_url.rstrip("/")
        self.frontpage_id = frontpage_id
        self.session = CachedSession(expire_after=300)

    def get_topic(self, path):
        """
        Retrieve topic object by path
        """

        response = self.session.get(f"{self.base_url}/t/{path}.json",
                                    allow_redirects=False)
        response.raise_for_status()

        if response.status_code >= 300:
            raise RedirectFoundError(response=response)

        return response.json()

    def parse_topic(self, topic):
        return {
            "title":
            topic["title"],
            "body_html":
            topic["post_stream"]["posts"][0]["cooked"],
            "updated":
            dateutil.parser.parse(
                topic["post_stream"]["posts"][0]["updated_at"]),
            "forum_link":
            f"{self.base_url}/t/{topic['slug']}/{topic['id']}",
            "path":
            f"/t/{topic['slug']}/{topic['id']}",
        }

    def get_frontpage(self):
        # Get topic data
        topic = self.get_topic(self.frontpage_id)
        frontpage = self.parse_topic(topic)

        # Split HTML into nav and body
        soup = BeautifulSoup(frontpage["body_html"], features="html.parser")
        splitpoint = soup.find(re.compile("^h[1-6]$"), text="Content")

        if splitpoint:
            body_elements = splitpoint.fetchPreviousSiblings()
            frontpage["body_html"] = "\n".join(
                map(str, reversed(body_elements)))

            nav_elements = splitpoint.fetchNextSiblings()
            nav_html = "\n".join(map(str, nav_elements))
        else:
            nav_html = ("<p><em>"
                        "Error: Failed to parse navigation from"
                        f' <a href="{frontpage["forum_link"]}">'
                        "the frontpage topic</a>."
                        " Please check the format."
                        "</p></em>")

        return frontpage, nav_html

    def process_html(self, html):
        """
        Post-process the HTML output from Discourse to
        remove 'NOTE TO EDITORS' sections
        """

        soup = BeautifulSoup(html, features="html.parser")
        notes_to_editors_spans = soup.find_all(text="NOTE TO EDITORS")

        for span in notes_to_editors_spans:
            container = span.parent.parent.parent.parent

            if container.name == 'aside' and 'quote' in container.attrs[
                    'class']:
                container.decompose()

        return soup.prettify()

    def get_document(self, path):
        """
        Retrieve and return relevant data about a document:
        - Title
        - HTML content
        - Navigation content
        """

        document, nav_html = self.get_frontpage()

        if f"/t/{path}" != document["path"]:
            topic = self.get_topic(path)
            document = self.parse_topic(topic)

        document["body_html"] = self.process_html(document["body_html"])

        return document, nav_html