示例#1
0
# Permanent cache (forever)
import datetime

import requests
from cachecontrol import CacheControl
from cachecontrol.caches import FileCache
from cachecontrol.heuristics import ExpiresAfter

sess = requests.session()
forever_cache_storage = FileCache('.web_cache', forever=True)
heuristic = ExpiresAfter(weeks=52)
cached_sess = CacheControl(sess,
                           cache=forever_cache_storage,
                           heuristic=heuristic)

for n in range(10):
    start_time = datetime.datetime.now()
    response = cached_sess.get("https://ya.ru")
    delta_time = datetime.datetime.now() - start_time
    print("Time delta: ", delta_time)
    assert response.status_code == 200
示例#2
0
文件: client.py 项目: yola/pyutu
import os
import sys
import logging
import requests
import json
from cachecontrol import CacheControl
from cachecontrol.caches import FileCache

cache_dir = '/tmp'

logging.basicConfig(stream=sys.stdout,
                    format="%(asctime)s: " + logging.BASIC_FORMAT,
                    datefmt="%Y-%m-%dT%H:%M:%S%z")
logger = logging.getLogger(__name__)

req = CacheControl(requests.Session(),
                   cache=FileCache(os.path.join(cache_dir, 'pyutu.cache')))

regions = {
    'ap-northeast-1': "Asia Pacific (Tokyo)",
    'ap-southeast-1': "Asia Pacific (Singapore)",
    'ap-southeast-2': "Asia Pacific (Sydney)",
    'eu-central-1': "EU (Frankfurt)",
    'eu-west-1': "EU (Ireland)",
    'sa-east-1': "South America (Sao Paulo)",
    'us-east-1': "US East (N. Virginia)",
    'us-west-1': "US West (N. California)",
    'us-west-2': "US West (Oregon)"
}

svcs = {
    "ec2": {
# pointers to remote datasets
organisation_csv = os.environ.get(
    "organisation_csv",
    "https://raw.githubusercontent.com/digital-land/organisation-dataset/master/collection/organisation.csv",
)

region_csv = "https://raw.githubusercontent.com/digital-land/region-collection/master/data/region.csv"


def name_to_identifier(n):
    return n.lower().replace(" ", "-").replace(",", "")


# cache files collected
session = CacheControl(requests.session(), cache=FileCache(".cache"))


def get(url):
    r = session.get(url)
    r.raise_for_status()
    return r.text


def get_csv_as_json(path_to_csv, cache=False):
    if cache:
        csv_str = get(path_to_csv)
        csv_pd = pd.read_csv(StringIO(csv_str), sep=",")
    else:
        csv_pd = pd.read_csv(path_to_csv, sep=",")
    return json.loads(csv_pd.to_json(orient="records"))
示例#4
0
#!/usr/bin/env python3
import os
import requests
from cachecontrol import CacheControl
from cachecontrol.caches import FileCache
import tempfile

PROJECTS = [356]
USERS = ['jptolosa87']

TASK_MANAGER = "https://tasks.kaart.com"

CACHE_DIRECTORY = os.path.join(tempfile.gettempdir(), 'task_modified')
cached_session = CacheControl(requests.session(),
                              cache=FileCache(CACHE_DIRECTORY))


def getModifiedTasksInProject(project):
    api = "api/v1/project/{pid}".format(pid=project)
    with cached_session.get("/".join([TASK_MANAGER, api])) as response:
        response.raise_for_status()
        json_data = response.json()
    tasks = json_data['tasks']['features']

    return findModifiedTasks(project, tasks)


def findModifiedTasks(project, tasks):
    api = "api/v1/project/{project}/task/{task}"
    modified_tasks = []
    for task in tasks:
示例#5
0
from requests.sessions import Session
from cachecontrol import CacheControl
import google.auth.transport.requests
from google.oauth2 import id_token

VALID_ISSUERS = ['accounts.google.com', 'https://accounts.google.com']
CACHED_SESSION = CacheControl(Session())

# XXX Don't hardcode
USERINFO_ENDPOINT = "https://openidconnect.googleapis.com/v1/userinfo"


def get_idinfo_from_access_token(access_token: str) -> dict:
    """Fetches user information using the access token provided.

    Raises ValueError if an error occurs, including the user not being
    authorized.
    """
    session = Session()
    r = session.get(USERINFO_ENDPOINT,
                    headers={'Authorization': 'Bearer ' + access_token})
    if r.status_code != 200:
        raise ValueError("Unexpected response code %d" % r.status_code)
    return r.json()


def validate_id_token(idt: str, client_id: str) -> dict:
    """Validate the id_token passed using Google's validation code.

    idt is an id_token, which can be extracted from an OpenID Connect
    authorization response as the 'id_token' field.
def sess():
    sess = CacheControl(requests.Session())
    yield sess

    # closing session object
    sess.close()
示例#7
0
        ''' Class to force storing pages in CacheControl.

        From CacheControl examples.
        '''
        def update_headers(self, response):
            date = parsedate(response.headers['date'])
            expires = datetime(*date[:6]) + timedelta(weeks=1)
            return {
                'expires': formatdate(calendar.timegm(expires.timetuple())),
                'cache-control': 'public',
            }

    Scrapper._requests = CacheControl(Scrapper._requests,
                                      cache=FileCache(
                                          tempfile.gettempdir() +
                                          '/cagematch-cache',
                                          forever=True,
                                      ),
                                      heuristic=OneWeekHeuristic())

except Exception as e:
    logging.warning('CacheControl not available:', e)


class WikiData(Scrapper):

    API_URL = 'https://www.wikidata.org/w/api.php'
    COMMONS_URL = 'https://commons.wikimedia.org/wiki/File:'

    WRESTLER_ID = 13474373
    PROMOTION_ID = 131359
示例#8
0
def getURL(url,
           post_data=None,
           params=None,
           headers=None,
           timeout=30,
           session=None,
           json=False):
    """
    Returns a byte-string retrieved from the url provider.
    """

    # request session
    cache_dir = sickbeard.CACHE_DIR or _getTempDir()
    session = CacheControl(sess=session,
                           cache=caches.FileCache(
                               os.path.join(cache_dir, 'sessions')))

    # request session headers
    req_headers = {'User-Agent': USER_AGENT, 'Accept-Encoding': 'gzip,deflate'}
    if headers:
        req_headers.update(headers)
    session.headers.update(req_headers)

    # request session ssl verify
    session.verify = False

    # request session paramaters
    session.params = params

    try:
        # Remove double-slashes from url
        parsed = list(urlparse.urlparse(url))
        parsed[2] = re.sub("/{2,}", "/",
                           parsed[2])  # replace two or more / with one
        url = urlparse.urlunparse(parsed)

        # request session proxies
        if sickbeard.PROXY_SETTING:
            logger.log("Using proxy for url: " + url, logger.DEBUG)
            session.proxies = {
                "http": sickbeard.PROXY_SETTING,
                "https": sickbeard.PROXY_SETTING,
            }

        # decide if we get or post data to server
        if post_data:
            resp = session.post(url, data=post_data, timeout=timeout)
        else:
            resp = session.get(url, timeout=timeout)

        if not resp.ok:
            logger.log(
                u"Requested url " + url + " returned status code is " +
                str(resp.status_code) + ': ' +
                clients.http_error_code[resp.status_code], logger.DEBUG)
            return

    except requests.exceptions.HTTPError, e:
        logger.log(u"HTTP error " + str(e.errno) + " while loading URL " + url,
                   logger.WARNING)
        return
示例#9
0
 def test_file_cache_recognizes_consumed_file_handle(self):
     s = CacheControl(Session(), FileCache('web_cache'))
     s.get('http://httpbin.org/cache/60')
     r = s.get('http://httpbin.org/cache/60')
     assert r.from_cache
示例#10
0
def latest(repo,
           output_format='version',
           pre=False,
           newer_than=False,
           assets_filter=False,
           shortUrls=False):

    # data that we may collect further
    # the main thing, we're after - parsed version number, e.g. 1.2.3 (no extras chars)
    version = None
    # corresponding tag name, e.g. v1.2.3 or v1.2.3-stable (extra chars OK,
    # used for constructing non-API tar download URLs)
    tag = None
    description = None
    # set this when an API returns json
    data = None
    license = None
    # date of selected release, used in checks
    # github API returns tags NOT in chronological order
    # so if author switched from v20150121 (old) to v2.0.1 format, the old value is "higher"
    # so we have to check if a tag is actually newer, this is very slow but we have to accept :)
    tagDate = None

    headers = {}
    cache_dir = user_cache_dir("lastversion")
    log.info("Using cache directory: {}.".format(cache_dir))
    # Some special non-Github cases for our repository are handled by checking URL

    # 1. nginx version is taken as version of stable (written by rpm check script)
    # to /usr/local/share/builder/nginx-stable.ver
    if repo.startswith(('http://nginx.org/', 'https://nginx.org/')):
        with open('/usr/local/share/builder/nginx-stable.ver', 'r') as file:
            return file.read().replace('\n', '')

    # 2. monit version can be obtained from Bitbucket downloads section of the project
    elif repo.startswith('https://mmonit.com/'):
        with CacheControl(requests.Session(), cache=FileCache(cache_dir)) as s:
            # Special case Monit repo
            response = s.get(
                "https://api.bitbucket.org/2.0/repositories/{}/downloads".
                format("tildeslash/monit"),
                headers=headers)
            data = response.json()
            s.close()
            return sanitize_version(data['values'][0]['name'])

    # 3. Everything else is GitHub passed as owner/repo
    else:
        # But if full link specified, strip it to owner/repo
        apiBase = 'https://api.github.com'
        githubHostname = 'github.com'
        if repo.startswith(('https://', 'http://')):
            urlParts = repo.split('/')
            githubHostname = urlParts[2]
            repo = urlParts[3] + "/" + urlParts[4]
            if 'github.com' != githubHostname:
                apiBase = "https://{}/api/v3".format(githubHostname)

        # Explicitly specify API version we want:
        # headers['Accept'] = "application/vnd.github.v3+json"

        api_token = os.getenv("GITHUB_API_TOKEN")
        if api_token:
            headers['Authorization'] = "token {}".format(api_token)

        with CacheControl(requests.Session(), cache=FileCache(cache_dir)) as s:

            s.headers.update(headers)

            # search it :)
            if '/' not in repo:
                r = s.get('{}/search/repositories?q={}+in:name'.format(
                    apiBase, repo),
                          headers=headers)
                repo = r.json()['items'][0]['full_name']

            # releases/latest fetches only non-prerelease, non-draft, so it
            # should not be used for hunting down pre-releases assets
            if not pre:
                # https://stackoverflow.com/questions/28060116/which-is-more-reliable-for-github-api-conditional-requests-etag-or-last-modifie/57309763?noredirect=1#comment101114702_57309763
                # ideally we disable ETag validation for this endpoint completely
                r = s.get('{}/repos/{}/releases/latest'.format(apiBase, repo),
                          headers=headers)
                if r.status_code == 200:
                    the_tag = r.json()['tag_name']
                    version = sanitize_version(the_tag, pre)
                    if version:
                        log.info(
                            "Set version as current selection: {}.".format(
                                version))
                        tag = the_tag
                        data = r.json()
                        tagDate = dateutil.parser.parse(
                            r.json()['published_at'])
            else:
                r = s.get('{}/repos/{}/releases'.format(apiBase, repo),
                          headers=headers)
                if r.status_code == 200:
                    for release in r.json():
                        the_tag = release['tag_name']
                        the_version = sanitize_version(the_tag, pre)
                        if the_version and ((not version) or
                                            (the_version > version)):
                            version = the_version
                            log.info(
                                "Set version as current selection: {}.".format(
                                    version))
                            tag = the_tag
                            data = release
                            tagDate = dateutil.parser.parse(
                                data['published_at'])

            # formal release may not exist at all, or be "late/old" in case
            # actual release is only a simple tag so let's try /tags

            r = s.get('{}/repos/{}/tags'.format(apiBase, repo),
                      headers=headers)
            if r.status_code == 200:
                for t in r.json():
                    the_tag = t['name']
                    the_version = sanitize_version(the_tag, pre)

                    r_commit = s.get('{}/repos/{}/git/commits/{}'.format(
                        apiBase, repo, t['commit']['sha']),
                                     headers=headers)
                    the_date = r_commit.json()['committer']['date']
                    the_date = dateutil.parser.parse(the_date)

                    if (the_version and ((not version) or (the_version > version))) \
                            or (not tagDate or the_date > tagDate):
                        # rare case: if upstream filed formal pre-release that passes as stable
                        # version (tag is 1.2.3 instead of 1.2.3b) double check if pre-release
                        # TODO handle API failure here as it may result in "false positive"?
                        if not pre:
                            r = s.get('{}/repos/{}/releases/tags/{}'.format(
                                apiBase, repo, the_tag),
                                      headers=headers)
                            if r.status_code == 200:
                                if r.json()['prerelease']:
                                    log.info(
                                        "Found formal release for this tag which is unwanted "
                                        "pre-release: {}.".format(version))
                                    continue
                        version = the_version
                        log.info(
                            "Setting version as current selection: {}.".format(
                                version))
                        tag = the_tag
                        tagDate = the_date
                        data = t
            else:
                sys.stderr.write(r.text)
                return None

            if output_format == 'json':
                r = s.get('{}/repos/{}/license'.format(apiBase, repo),
                          headers=headers)
                if r.status_code == 200:
                    license = r.json()
        s.close()

        # bail out, found nothing that looks like a release
        if not version:
            return False

        # special exit code "2" is useful for scripting to detect if no newer release exists
        if newer_than and not (version > newer_than):
            sys.exit(2)

        # return the release if we've reached far enough:
        if output_format == 'version':
            return str(version)
        elif output_format == 'json':
            if not data:
                data = {}
            if description:
                description = description.strip()
            data['version'] = str(version)
            data['description'] = description
            data['v_prefix'] = tag.startswith("v")
            data['spec_tag'] = tag.replace(str(version), "%{upstream_version}")
            data['tag_name'] = tag
            data['license'] = license
            return json.dumps(data)
        elif output_format == 'assets':
            urls = []
            if 'assets' in data and len(data['assets']) > 0:
                for asset in data['assets']:
                    if assets_filter:
                        if not re.search(assets_filter, asset['name']):
                            continue
                    else:
                        if os.name == 'nt' and asset['name'].endswith(
                                posixAssetMarkers + darwinAssetMarkers):
                            continue
                        # zips are OK for Linux, so we do some heuristics to weed out Windows stuff
                        if os.name == 'posix' and asset['name'].endswith(
                                darwinAssetMarkers + windowsAssetMarkers):
                            continue
                    urls.append(asset['browser_download_url'])
            else:
                download_url = github_tag_download_url(githubHostname, repo,
                                                       tag, shortUrls)
                if not assets_filter or re.search(assets_filter, download_url):
                    urls.append(download_url)
            if not len(urls):
                sys.exit(3)
            else:
                return "\n".join(urls)
        elif output_format == 'source':
            return github_tag_download_url(githubHostname, repo, tag,
                                           shortUrls)
示例#11
0
def latest(repo,
           output_format='version',
           pre_ok=False,
           assets_filter=None,
           short_urls=False,
           major=None,
           only=None,
           at=None,
           having_asset=None):
    """Find latest release version for a project.

    Args:
        major (str): Only consider versions which are "descendants" of this major version string
        short_urls (bool): Whether we should try to return shorter URLs for release data
        assets_filter (str): Regular expression for filtering assets for the latest release
        only (str): Only consider tags with this text. Useful for repos with multiple projects
        repo (str): Repository specifier in any form.
        output_format (str): Affects return format. Possible values `version`, `json`, `dict`,
                             `assets`, `source`, `tag`.
        pre_ok (bool): Specifies whether pre-releases can be accepted as newer version.
        at (str): Specifies repo hosting more precisely, only useful if repo argument was
                  specified as one word.
        having_asset (Union[str, bool]): Only consider releases with the given asset.
                                         Pass `True` for any asset

    Returns:
        Version: Newer version object, if found and `output_format` is `version`.
    Returns:
        str: Single string containing tag, if found and `output_format` is `tag`

    """
    cache_dir = user_cache_dir("lastversion")
    log.info("Using cache directory: {}.".format(cache_dir))
    repo_data = {}

    if repo.endswith('.yml') and not repo.startswith(('http://', 'https://')):
        with open(repo) as fpi:
            repo_data = yaml.safe_load(fpi)
            if 'repo' in repo_data:
                if 'nginx-extras' in repo:
                    repo_data['module_of'] = 'nginx'
                name = os.path.splitext(os.path.basename(repo))[0]
                if 'module_of' in repo_data:
                    name = '{}-module-{}'.format(repo_data['module_of'], name)
                repo = repo_data['repo']
                repo_data['name'] = name

    if repo.startswith(
        ('http://', 'https://')) and repo.endswith('Chart.yaml'):
        at = 'helm_chart'

    if repo.endswith('.spec'):
        # repo is specified inside the .spec file
        # github repo is resolved via %{upstream_github} + %{name}/%{upstream_name}
        # no upstream_github global means that the spec was not prepared for lastversion
        # optional: use of spec_tag macros if the source is from GitHub. in edge cases we check
        # new version via GitHub, but prepared sources are elsewhere
        with open(repo) as f:
            name = None
            upstream_github = None
            upstream_name = None
            current_version = None
            spec_repo = None
            spec_url = None
            for l in f.readlines():
                if l.startswith('%global lastversion_repo'):
                    spec_repo = l.split(' ')[2].strip()
                elif l.startswith('%global upstream_github'):
                    upstream_github = l.split(' ')[2].strip()
                elif l.startswith('%global upstream_name'):
                    upstream_name = l.split(' ')[2].strip()
                elif l.startswith('Name:'):
                    name = l.split('Name:')[1].strip()
                elif l.startswith('URL:'):
                    spec_url = l.split('URL:')[1].strip()
                elif l.startswith('%global upstream_version '):
                    current_version = l.split(' ')[2].strip()
                    # influences %spec_tag to use %upstream_version instead of %version
                    repo_data['module_of'] = True
                elif l.startswith('Version:') and not current_version:
                    current_version = l.split('Version:')[1].strip()
            if spec_url:
                spec_host = urlparse(spec_url).hostname
                if spec_host in ['github.com'
                                 ] and not upstream_github and not spec_repo:
                    log.warning(
                        'Neither %upstream_github nor %lastversion_repo macros were found. '
                        'Please prepare your spec file using instructions: '
                        'https://lastversion.getpagespeed.com/spec-preparing.html'
                    )
            if not current_version:
                log.critical(
                    'Did not find neither Version: nor %upstream_version in the spec file'
                )
                sys.exit(1)
            try:
                if current_version != 'x':
                    repo_data['current_version'] = Version(current_version)
            except InvalidVersion:
                log.critical(
                    'Failed to parse current version in {}. Tried {}'.format(
                        repo, current_version))
                sys.exit(1)
            if upstream_name:
                repo_data['name'] = upstream_name
                repo_data['spec_name'] = '%{upstream_name}'
            else:
                repo_data['name'] = name
                repo_data['spec_name'] = '%{name}'
            if upstream_github:
                repo = "{}/{}".format(upstream_github, repo_data['name'])
                log.info(
                    'Discovered GitHub repo {} from .spec file'.format(repo))
            elif spec_repo:
                repo = spec_repo
                log.info(
                    'Discovered explicit repo {} from .spec file'.format(repo))
            elif spec_url:
                repo = spec_url

    if (not at or '/' in repo) and at != 'helm_chart':
        # find the right hosting for this repo
        project_holder = HolderFactory.get_instance_for_repo(repo, only=only)
    else:
        project_holder = HolderFactory.HOLDERS[at](repo, hostname=None)

    project_holder.set_only(only)
    project_holder.set_having_asset(having_asset)

    # we are completely "offline" for 1 hour, not even making conditional requests
    # heuristic=ExpiresAfter(hours=1)   <- make configurable
    with CacheControl(project_holder, cache=FileCache(cache_dir)) as s:
        release = s.get_latest(pre_ok=pre_ok, major=major)
    s.close()

    # bail out, found nothing that looks like a release
    if not release:
        return None

    from_type = 'Located the latest release tag {} at: {}'.format(
        release['tag_name'], project_holder.get_canonical_link())
    if 'type' in release:
        from_type = '{} via {} mechanism'.format(from_type, release['type'])
    log.info(from_type)

    version = release['version']
    tag = release['tag_name']

    # return the release if we've reached far enough:
    if output_format == 'version':
        return version

    if output_format in ['json', 'dict']:
        if output_format == 'dict':
            release['version'] = version
        else:
            release['version'] = str(version)
            if 'tag_date' in release:
                release['tag_date'] = str(release['tag_date'])
        release['v_prefix'] = tag.startswith("v")
        version_macro = 'upstream_version' if 'module_of' in repo_data else 'version'
        version_macro = '%{{{}}}'.format(version_macro)
        holder_i = {value: key for key, value in HolderFactory.HOLDERS.items()}
        release['source'] = holder_i[type(project_holder)]
        release['spec_tag'] = tag.replace(str(version), version_macro)
        # spec_tag_no_prefix is the helpful macro which will allow us to know where tarball
        # extracts to (GitHub-specific)
        if release['spec_tag'].startswith('v{}'.format(version_macro)) or \
                re.match(r'^v\d', release['spec_tag']):
            release['spec_tag_no_prefix'] = release['spec_tag'].lstrip('v')
        else:
            release['spec_tag_no_prefix'] = release['spec_tag']
        release['tag_name'] = tag
        if hasattr(s, 'repo_license'):
            release['license'] = s.repo_license(tag)
        if hasattr(s, 'repo_readme'):
            release['readme'] = s.repo_readme(tag)
        release.update(repo_data)
        try:
            release['assets'] = s.get_assets(release, short_urls,
                                             assets_filter)
        except NotImplementedError:
            pass
        release['from'] = project_holder.get_canonical_link()
        return release

    if output_format == 'assets':
        return s.get_assets(release, short_urls, assets_filter)

    if output_format == 'source':
        return s.release_download_url(release, short_urls)

    if output_format == 'tag':
        return tag

    return None
示例#12
0
# # WG Notifications of deaths of residents related to COVID-19 in adult care homes

from gssutils import *
import json
import numpy as np

if is_interactive():
    from requests import Session
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import ExpiresAfter
    scrape = Scraper(seed="info.json",
                     session=CacheControl(Session(),
                                          cache=FileCache('.cache'),
                                          heuristic=ExpiresAfter(days=1)))
    dist = scrape.distribution(
        latest=True,
        title=lambda x: x.startswith(
            'Notifications of deaths of residents related to COVID-19'))
    tabs = {tab.name: tab for tab in dist.as_databaker()}
list(tabs)


# +
def left(s, amount):
    return s[:amount]


def right(s, amount):
    return s[-amount:]
示例#13
0
                'size': '1',
                'zone': 'one'
            }
            create_fields['csrf_token'] = csrf_token
            self.session.post(self.url + 'volumes/create', data=create_fields)


if __name__ == "__main__":
    requests.packages.urllib3.disable_warnings()
    url = URL
    num_users = NUM_USERS
    num_iterations = NUM_ITERATIONS
    if len(sys.argv) > 1:
        url = sys.argv[1]
        if len(sys.argv) > 2:
            num_users = int(sys.argv[2])
            if len(sys.argv) > 3:
                num_iterations = int(sys.argv[3])
    else:
        print "usage: reqgenerator.py <console-url> [num sessions] [num iterations/session]"
        sys.exit()
    # start a bunch of users
    for i in range(0, num_users):
        s = requests.Session()
        s = CacheControl(s)
        print "Starting user: " + str(i)
        u = BrowsingUser(url, s, 'user' + str(i), num_iterations)
        u.login('ui-test-acct-00', 'admin', 'mypassword0')
        Thread(target=u).start()
        time.sleep(2)
示例#14
0
文件: location.py 项目: pvsr/weather
import configparser
import json

from typing import Mapping

import requests
from cachecontrol import CacheControl  # type: ignore

REQUEST_CACHE = CacheControl(requests.session())

BASE_URL = "https://api.weather.gov"


class Location:
    short_name: str
    long_name: str
    forecast_url: str
    hourly_forecast_url: str
    alert_url: str

    def __init__(self, short_name: str, long_name: str, latitude: float,
                 longitude: float):
        self.short_name = short_name
        self.long_name = long_name

        trunc = lambda f: format(f, '.4f')
        url = f"{BASE_URL}/points/{trunc(latitude)},{trunc(longitude)}"
        data = fetch_json(url, 'point')['properties']

        self.forecast_url = data['forecast']
        self.hourly_forecast_url = data['forecastHourly']
示例#15
0
def getNewName(uid):
    api_url = "https://www.openstreetmap.org/api/0.6/user/{}".format(uid)
    session = CacheControl(requests.session())
    result = session.get(api_url).text
    root = ET.fromstring(result)
    return root.find("user").attrib['display_name']
示例#16
0
def cli(url, repositories, search, rows, minstar, token, output_file_name,
        max_repos_retrieved):

    MODE = os.environ.get("GHTOPDEP_ENV")
    REPOS_PER_FILE_SIZE_LIMIT = 3000

    if (search) and token:
        gh = github3.login(token=token)
        CacheControl(gh.session,
                     cache=FileCache(CACHE_DIR),
                     heuristic=OneDayHeuristic())
    elif (search) and not token:
        click.echo("Please provide token")
        sys.exit()

    destination = "repository"
    destinations = "repositories"
    if not repositories:
        destination = "package"
        destinations = "packages"

    repos = []
    more_than_zero_count = 0
    total_repos_count = 0
    # spinner = Halo(text="Fetching information about {0}".format(destinations), spinner="dots")
    # spinner.start()

    sess = requests.session()
    retries = Retry(total=15, backoff_factor=15, status_forcelist=[429])
    adapter = CacheControlAdapter(max_retries=retries,
                                  cache=FileCache(CACHE_DIR),
                                  heuristic=OneDayHeuristic())
    sess.mount("http://", adapter)
    sess.mount("https://", adapter)

    page_url = get_page_url(sess, url, destination)

    found_repos = 0
    total_found_repos = 0
    number_of_files_processed = 0

    while True:
        time.sleep(1)
        response = sess.get(page_url)

        print(page_url)

        parsed_node = HTMLParser(response.text)
        dependents = parsed_node.css(ITEM_SELECTOR)
        total_repos_count += len(dependents)
        for dep in dependents:
            repo_stars_list = dep.css(STARS_SELECTOR)
            # only for ghost or private? packages
            if repo_stars_list:
                repo_stars = repo_stars_list[0].text().strip()
                repo_stars_num = int(repo_stars.replace(",", ""))
            else:
                continue

            if repo_stars_num != 0:
                more_than_zero_count += 1
            if repo_stars_num >= minstar:
                relative_repo_url = dep.css(
                    REPO_SELECTOR)[0].attributes["href"]
                repo_url = "{0}{1}".format(GITHUB_URL, relative_repo_url)

                # can be listed same package
                is_already_added = already_added(repo_url, repos)
                if not is_already_added and repo_url != url:
                    # print("adding repo ", repo_url)
                    found_repos += 1
                    total_found_repos += 1

                    repos.append({"url": repo_url, "stars": repo_stars_num})

                    if found_repos >= REPOS_PER_FILE_SIZE_LIMIT:
                        sorted_repos = repos
                        repos = []
                        number_of_files_processed += 1
                        found_repos = 0

                        show_result(sorted_repos, total_repos_count,
                                    more_than_zero_count, destinations,
                                    number_of_files_processed,
                                    output_file_name)

                        print("JSON output placed into file!")

                    if total_found_repos > max_repos_retrieved:
                        print(f'Collected {total_found_repos} repos.')
                        exit

        node = parsed_node.css(NEXT_BUTTON_SELECTOR)
        if len(node) == 2:
            page_url = node[1].attributes["href"]
        elif len(node) == 0 or node[0].text() == "Previous":
            # spinner.stop()
            break
        elif node[0].text() == "Next":
            page_url = node[0].attributes["href"]

    sorted_repos = repos

    if search:
        for repo in repos:
            repo_path = urlparse(repo["url"]).path[1:]
            for s in gh.search_code("{0} repo:{1}".format(search, repo_path)):
                click.echo("{0} with {1} stars".format(s.html_url,
                                                       repo["stars"]))
    elif number_of_files_processed == 0:
        show_result(sorted_repos, total_repos_count, more_than_zero_count,
                    destinations, number_of_files_processed, output_file_name)
示例#17
0
文件: fetch.py 项目: nowster/ceefax
 def __init__(self):
     cachefile = f"{config['cachedir']}/requests"
     self._cached_sess = CacheControl(requests.Session(),
                                      cache=FileCache(cachefile))
示例#18
0
def download_wheel(url: str, expected_md5: str) -> bytes:
    session = requests.session()
    cached_session = CacheControl(session, cache=FileCache(".web_cache"))

    response = cached_session.get(url)
    return response.content
示例#19
0
def cli(url, repositories, search, table, rows, minstar, report, description,
        token):
    MODE = os.environ.get("GHTOPDEP_ENV")
    BASE_URL = 'https://437w61gcj1.execute-api.us-west-2.amazonaws.com/api'
    if MODE == "development":
        BASE_URL = 'http://127.0.0.1:8080'

    if report:
        try:
            result = requests.get('{}/repos?url={}'.format(BASE_URL, url))
            if result.status_code != 404:
                sorted_repos = sort_repos(result.json()['deps'], rows)
                repos = readable_stars(sorted_repos)
                click.echo(tabulate(repos, headers="keys", tablefmt="github"))
                sys.exit()
        except requests.exceptions.ConnectionError as e:
            click.echo(e)

    if (description or search) and token:
        gh = github3.login(token=token)
        CacheControl(gh.session,
                     cache=FileCache(CACHE_DIR),
                     heuristic=OneDayHeuristic())
    elif (description or search) and not token:
        click.echo("Please provide token")
        sys.exit()

    destination = "repository"
    destinations = "repositories"
    if not repositories:
        destination = "package"
        destinations = "packages"
    page_url = "{0}/network/dependents?dependent_type={1}".format(
        url, destination.upper())

    repos = []
    more_than_zero_count = 0
    total_repos_count = 0
    spinner = Halo(text="Fetching information about {0}".format(destinations),
                   spinner="dots")
    spinner.start()

    sess = requests.session()
    retries = Retry(total=15, backoff_factor=15, status_forcelist=[429])
    adapter = CacheControlAdapter(max_retries=retries,
                                  cache=FileCache(CACHE_DIR),
                                  heuristic=OneDayHeuristic())
    sess.mount("http://", adapter)
    sess.mount("https://", adapter)

    while True:
        response = sess.get(page_url)
        parsed_node = HTMLParser(response.text)
        dependents = parsed_node.css(ITEM_SELECTOR)
        total_repos_count += len(dependents)
        for dep in dependents:
            repo_stars_list = dep.css(STARS_SELECTOR)
            # only for ghost or private? packages
            if repo_stars_list:
                repo_stars = repo_stars_list[0].text().strip()
                repo_stars_num = int(repo_stars.replace(",", ""))
            else:
                continue

            if repo_stars_num != 0:
                more_than_zero_count += 1
            if repo_stars_num >= minstar:
                relative_repo_url = dep.css(
                    REPO_SELECTOR)[0].attributes["href"]
                repo_url = "{0}{1}".format(GITHUB_URL, relative_repo_url)

                # can be listed same package
                is_already_added = already_added(repo_url, repos)
                if not is_already_added and repo_url != url:
                    if description:
                        repo_description = fetch_description(
                            gh, relative_repo_url)
                        repos.append({
                            "url": repo_url,
                            "stars": repo_stars_num,
                            "description": repo_description
                        })
                    else:
                        repos.append({
                            "url": repo_url,
                            "stars": repo_stars_num
                        })

        node = parsed_node.css(NEXT_BUTTON_SELECTOR)
        if len(node) == 2:
            page_url = node[1].attributes["href"]
        elif len(node) == 0 or node[0].text() == "Previous":
            spinner.stop()
            break
        elif node[0].text() == "Next":
            page_url = node[0].attributes["href"]

    if report:
        try:
            requests.post('{}/repos'.format(BASE_URL),
                          json={
                              "url": url,
                              "deps": repos
                          })
        except requests.exceptions.ConnectionError as e:
            click.echo(e)

    sorted_repos = sort_repos(repos, rows)

    if search:
        for repo in repos:
            repo_path = urlparse(repo["url"]).path[1:]
            for s in gh.search_code("{0} repo:{1}".format(search, repo_path)):
                click.echo("{0} with {1} stars".format(s.html_url,
                                                       repo["stars"]))
    else:
        show_result(sorted_repos, total_repos_count, more_than_zero_count,
                    destinations, table)
示例#20
0
import requests
from cachecontrol import CacheControl

session = requests.session()
cached_session = CacheControl(session)  # sessionをラップしたcached_sessionを作る。

# 1回目はキャッシュがないので、サーバーから取得しキャッシュする。
response = cached_session.get('https://docs.python.org/3/')
print(response.from_cache)  # False

# 2回目はETagとLast-Modifiedの値を使って更新されているかを確認する。
# 更新されていない場合のコンテンツはキャッシュから取得するので高速に処理できる。
response = cached_session.get('https://docs.python.org/3/')
print(response.from_cache)  # True
示例#21
0
import sys
from cachecontrol import CacheControl
from datetime import datetime
from itunes import HOST_NAME

__all__ = [
    'TS_FORMAT', 'SESSION', 'ITunesException', 'BaseObject', 'Resource',
    'NoResultsFoundException', 'Artist', 'Album', 'Track', 'Audiobook',
    'Software', 'TVEpisode'
]

#: iTunes API Timestamp format
TS_FORMAT = '%Y-%m-%dT%H:%M:%S'

#: Globally accessible cache-enabled requests session
SESSION = CacheControl(requests.session())


class ITunesException(Exception):
    """Base iTunes request exception"""
    def __init__(self, message):
        self.message = message

    def __str__(self):
        return '{type}: {msg}'.format(type=self.__class__.__name__,
                                      msg=self.message)


class NoResultsFoundException(ITunesException):
    """iTunes error for when no results are returned from a Lookup"""
    def __init__(self):
示例#22
0
 def sess(self, url, tmpdir):
     self.url = url
     self.cache = FileCache(str(tmpdir))
     sess = CacheControl(requests.Session(), cache=self.cache)
     return sess
示例#23
0
def includeme(config):   # pragma: no cover
    dataseturigenerator = UriPatternGenerator('https://id.erfgoed.net/datasets/thesauri/%s')

    TREES = SQLAlchemyProvider(
        {'id': 'TREES', 'conceptscheme_id': 1},
        config.registry.dbmaker
    )

    GEO = SQLAlchemyProvider(
        {'id': 'GEOGRAPHY', 'conceptscheme_id': 2},
        config.registry.dbmaker
    )

    STYLES = SQLAlchemyProvider(
        {
            'id': 'STYLES',
            'conceptscheme_id': 3,
            'dataset': {
                'uri': dataseturigenerator.generate(id='stijlen_en_culturen'),
                'publisher': ['https://id.erfgoed.net/actoren/501'],
                'created': [date(2008,2,14)],
                'language': ['nl-BE'],
                'license': [
                    'https://creativecommons.org/licenses/by/4.0/',
                    'http://data.vlaanderen.be/doc/licentie/modellicentie-gratis-hergebruik/v1.0'
                ]
            }

        },
        config.registry.dbmaker,
        uri_generator=UriPatternGenerator('https://id.erfgoed.net/thesauri/stijlen_en_culturen/%s')
    )

    MATERIALS = SQLAlchemyProvider(
        {
            'id': 'MATERIALS',
            'conceptscheme_id': 4,
            'dataset': {
                'uri': dataseturigenerator.generate(id='materialen'),
                'publisher': ['https://id.erfgoed.net/actoren/501'],
                'created': [date(2011,3,16)],
                'language': ['nl-BE'],
                'license': [
                    'https://creativecommons.org/licenses/by/4.0/',
                    'http://data.vlaanderen.be/doc/licentie/modellicentie-gratis-hergebruik/v1.0'
                ]
            }
        },
        config.registry.dbmaker,
        uri_generator=UriPatternGenerator('https://id.erfgoed.net/thesauri/materialen/%s')
    )

    EVENTTYPES = SQLAlchemyProvider(
        {
            'id': 'EVENTTYPE',
            'conceptscheme_id': 5,
            'dataset': {
                'uri': dataseturigenerator.generate(id='gebeurtenistypes'),
                'publisher': ['https://id.erfgoed.net/actoren/501'],
                'created': [date(2010,8,13)],
                'language': ['nl-BE'],
                'license': [
                    'https://creativecommons.org/licenses/by/4.0/',
                    'http://data.vlaanderen.be/doc/licentie/modellicentie-gratis-hergebruik/v1.0'
                ]
            }
        },
        config.registry.dbmaker,
        uri_generator=UriPatternGenerator('https://id.erfgoed.net/thesauri/gebeurtenistypes/%s')
    )

    HERITAGETYPES = SQLAlchemyProvider(
        {
            'id': 'HERITAGETYPE',
            'conceptscheme_id': 6,
            'dataset': {
                'uri': dataseturigenerator.generate(id='erfgoedtypes'),
                'publisher': ['https://id.erfgoed.net/actoren/501'],
                'created': [date(2008,2,14)],
                'language': ['nl-BE'],
                'license': [
                    'https://creativecommons.org/licenses/by/4.0/',
                    'http://data.vlaanderen.be/doc/licentie/modellicentie-gratis-hergebruik/v1.0'
                ]
            }
        },
        config.registry.dbmaker,
        uri_generator=UriPatternGenerator('https://id.erfgoed.net/thesauri/erfgoedtypes/%s')
    )

    PERIODS = SQLAlchemyProvider(
        {
            'id': 'PERIOD',
            'conceptscheme_id': 7,
            'dataset': {
                'uri': dataseturigenerator.generate(id='dateringen'),
                'publisher': ['https://id.erfgoed.net/actoren/501'],
                'created': [date(2008,2,14)],
                'language': ['nl-BE'],
                'license': [
                    'https://creativecommons.org/licenses/by/4.0/',
                    'http://data.vlaanderen.be/doc/licentie/modellicentie-gratis-hergebruik/v1.0'
                ]
            }
        },
        config.registry.dbmaker,
        uri_generator=UriPatternGenerator('https://id.erfgoed.net/thesauri/dateringen/%s')
    )

    SPECIES = SQLAlchemyProvider(
        {
            'id': 'SPECIES',
            'conceptscheme_id': 8,
            'dataset': {
                'uri': dataseturigenerator.generate(id='soorten'),
                'publisher': ['https://id.erfgoed.net/actoren/501'],
                'created': [date(2011,5,23)],
                'language': ['nl-BE', 'la'],
                'license': [
                    'https://creativecommons.org/licenses/by/4.0/',
                    'http://data.vlaanderen.be/doc/licentie/modellicentie-gratis-hergebruik/v1.0'
                ]
            }
        },
        config.registry.dbmaker,
        uri_generator=UriPatternGenerator('https://id.erfgoed.net/thesauri/soorten/%s')
    )

    # use 'subject': ['external'] for read only external providers (only available in REST service)

    getty_session = CacheControl(requests.Session(), heuristic=ExpiresAfter(weeks=1))

    AAT = AATProvider(
        {'id': 'AAT', 'subject': ['external']},
        session=getty_session
    )

    TGN = TGNProvider(
        {'id': 'TGN', 'subject': ['external']},
        session=getty_session
    )

    eh_session = CacheControl(requests.Session(), heuristic=ExpiresAfter(weeks=1))

    EH_PERIOD = HeritagedataProvider(
        {'id': 'EH_PERIOD', 'subject': ['external']},
        scheme_uri='http://purl.org/heritagedata/schemes/eh_period',
        session=eh_session
    )

    EH_MONUMENT_TYPE = HeritagedataProvider(
        {'id': 'EH_MONUMENT_TYPE', 'subject': ['external']},
        scheme_uri='http://purl.org/heritagedata/schemes/eh_tmt2',
        session=eh_session
    )

    EH_MATERIALS = HeritagedataProvider(
        {'id': 'EH_MATERIALS', 'subject': ['external']},
        scheme_uri='http://purl.org/heritagedata/schemes/eh_tbm',
        session=eh_session
    )

    skosregis = config.get_skos_registry()
    skosregis.register_provider(TREES)
    skosregis.register_provider(GEO)
    skosregis.register_provider(STYLES)
    skosregis.register_provider(MATERIALS)
    skosregis.register_provider(EVENTTYPES)
    skosregis.register_provider(HERITAGETYPES)
    skosregis.register_provider(PERIODS)
    skosregis.register_provider(SPECIES)
    skosregis.register_provider(AAT)
    skosregis.register_provider(TGN)
    skosregis.register_provider(EH_PERIOD)
    skosregis.register_provider(EH_MONUMENT_TYPE)
    skosregis.register_provider(EH_MATERIALS)
示例#24
0
 def setUp(self):
     self.url = "https://httpbin.org/cache/60"
     self.sess = CacheControl(requests.Session(),
                              cache=SQLiteCache(":memory:"))
from datetime import datetime as dt

import io
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("Debug level logging turned on")

import requests
from cachecontrol import CacheControl
from cachecontrol.caches import FileCache
from cachecontrol.heuristics import ExpiresAfter

sess = requests.session()
cached_sess = CacheControl(sess,
                           cache=FileCache('.web_cache'),
                           heuristic=ExpiresAfter(hours=1))

try:
    response = cached_sess.get(
        'https://rgtdb.com/events/json?search=&offset=0&limit=200'
    )  # Get 200 events. Should be about a week's worth of events
    response.raise_for_status()

except HTTPError as http_err:
    print(f'HTTP error occurred: {http_err}')
except Exception as err:
    print(f'Other error occurred: {err}')

logger.setLevel(logging.ERROR)
示例#26
0
from cachecontrol import CacheControl

from jose import jwt
from jose.exceptions import JWTError

_GOOGLE_OAUTH2_CERTS_URL = "https://www.googleapis.com/oauth2/v1/certs"
OAUTH2_PROVIDER = {
    'issuer': 'accounts.google.com',
    'audience': os.getenv('OAUTH2_CLIENT_ID'),
    'options': {
        'verify_at_hash': False
    }
}

sess = requests.session()
cached_sess = CacheControl(sess)


def current_user():
    return g.get('auth_user_payload')


def gsuite_authenticate(f):
    @wraps(f)
    def wrapper(*args, **kwargs):
        token = request.headers.get('Authorization', '')
        token = token.replace('Bearer ', '').replace('bearer ', '')
        if not token:
            return jsonify({
                'message': 'Unauthorized. No Authorization token provided.',
                'code': 401
示例#27
0
def cli(url, repositories, rows, minstar, description, token):
    if description and token:
        gh = github3.login(token=token)
        CacheControl(gh.session, cache=FileCache(".ghtopdep_cache"), heuristic=OneDayHeuristic())
        Repo = namedtuple("Repo", ["url", "stars", "description"])
    elif description and not token:
        click.echo("Please provide token")
    else:
        Repo = namedtuple("Repo", ["url", "stars"])

    destination = "repository"
    destinations = "repositories"
    if not repositories:
        destination = "package"
        destinations = "packages"
    page_url = "{0}/network/dependents?dependent_type={1}".format(url, destination.upper())

    repos = []
    more_than_zero_count = 0
    total_repos_count = 0
    spinner = Halo(text="Fetching information about {0}".format(destinations), spinner="dots")
    spinner.start()
    sess = requests.session()
    cached_sess = CacheControl(sess, cache=FileCache(".ghtopdep_cache"), heuristic=OneDayHeuristic())
    while True:
        response = cached_sess.get(page_url)
        parsed_node = HTMLParser(response.text)
        dependents = parsed_node.css(ITEM_SELECTOR)
        total_repos_count += len(dependents)
        for dep in dependents:
            repo_stars_list = dep.css(STARS_SELECTOR)
            # only for ghost or private? packages
            if repo_stars_list:
                repo_stars = dep.css(STARS_SELECTOR)[0].text().strip()
                repo_stars_num = int(repo_stars.replace(",", ""))
            else:
                continue

            if repo_stars_num != 0:
                more_than_zero_count += 1
            if repo_stars_num >= minstar:
                relative_repo_url = dep.css(REPO_SELECTOR)[0].attributes["href"]
                repo_url = "{0}{1}".format(GITHUB_URL, relative_repo_url)

                # can be listed same package
                is_already_added = already_added(repo_url, repos)
                if not is_already_added and repo_url != url:
                    if description:
                        repo_description = fetch_description(gh, relative_repo_url)
                        repos.append(Repo(repo_url, repo_stars_num, repo_description))
                    else:
                        repos.append(Repo(repo_url, repo_stars_num))

        node = parsed_node.css(NEXT_BUTTON_SELECTOR)
        if len(node) == 2:
            page_url = node[1].attributes["href"]
        elif len(node) == 0 or node[0].text() == "Previous":
            spinner.stop()
            break
        elif node[0].text() == "Next":
            page_url = node[0].attributes["href"]

    sorted_repos = sort_repos(repos, rows)
    show_result(sorted_repos, total_repos_count, more_than_zero_count, destination, destinations)
示例#28
0
# utf-8 encoding needed since it's used for the bot
# -*- coding: utf-8 -*-

import sys
import re
import requests
from cachecontrol import CacheControl
from bs4 import BeautifulSoup

session = requests.session()
cached_session = CacheControl(session)

# URL for the chatbot
URL = "https://kakko.pandorabots.com/pandora/talk?botid=f6a012073e345a08&skin=chat"

# Regex pattern to get the appropriate data
PATTERN = re.compile("</b>((.|\n)*?)<br>")


def ask_mitsuku(message):
    # Payload with message to POST
    payload = {'message': message}

    # Make POST request
    r = cached_session.post(URL, data=payload)

    # Parse data for Mitsuku's response
    soup = BeautifulSoup(r.content, 'html.parser')
    content = str(soup.p)
    pat = re.findall(PATTERN, content)
示例#29
0
#!/usr/bin/python3
import os, requests
from cachecontrol import CacheControl
import datetime
import hashlib, json
import zipfile
from fabricutil import *

from cachecontrol.caches import FileCache

forever_cache = FileCache('http_cache', forever=True)
sess = CacheControl(requests.Session(), forever_cache)


def mkdirs(path):
    if not os.path.exists(path):
        os.makedirs(path)


def filehash(filename, hashtype, blocksize=65536):
    hash = hashtype()
    with open(filename, "rb") as f:
        for block in iter(lambda: f.read(blocksize), b""):
            hash.update(block)
    return hash.hexdigest()


def get_maven_url(mavenKey, server, ext):
    mavenParts = mavenKey.split(":", 3)
    mavenVerUrl = server + mavenParts[0].replace(
        ".", "/") + "/" + mavenParts[1] + "/" + mavenParts[2] + "/"
def get_frag_by_loc_from_osm(imtiles_file,
                             loci,
                             zoom_level=0,
                             padding=0,
                             tile_size=256,
                             no_cache=False):
    width = 360
    height = 180

    ims = []

    prefixes = ['a', 'b', 'c']
    prefix_idx = math.floor(random() * len(prefixes))
    osm_src = 'http://{}.tile.openstreetmap.org'.format(prefixes[prefix_idx])

    s = CacheControl(requests.Session())

    for locus in loci:
        id = locus[-1]

        if not no_cache:
            osm_snip = None
            try:
                osm_snip = np.load(BytesIO(rdb.get('osm_snip_%s' % id)))
                if osm_snip is not None:
                    ims.append(osm_snip)
                    continue
            except:
                pass

        start_lng = locus[0]
        end_lng = locus[1]
        start_lat = locus[2]
        end_lat = locus[3]

        if not is_within(start_lng + 180, end_lng + 180, end_lat + 90,
                         start_lat + 90, width, height):
            ims.append(None)
            continue

        # Get tile ids
        start1, start2 = get_tile_pos_from_lng_lat(start_lng, start_lat,
                                                   zoom_level)
        end1, end2 = get_tile_pos_from_lng_lat(end_lng, end_lat, zoom_level)

        xPad = padding * (end1 - start1)
        yPad = padding * (start2 - end2)

        start1 -= xPad
        end1 += xPad
        start2 += yPad
        end2 -= yPad

        tile_start1_id = math.floor(start1)
        tile_start2_id = math.floor(start2)
        tile_end1_id = math.floor(end1)
        tile_end2_id = math.floor(end2)

        start1 = math.floor(start1 * tile_size)
        start2 = math.floor(start2 * tile_size)
        end1 = math.ceil(end1 * tile_size)
        end2 = math.ceil(end2 * tile_size)

        tiles_x_range = range(tile_start1_id, tile_end1_id + 1)
        tiles_y_range = range(tile_start2_id, tile_end2_id + 1)

        # Make sure that no more than 6 standard tiles (256px) are loaded.
        if tile_size * len(tiles_x_range) > hss.SNIPPET_OSM_MAX_DATA_DIM:
            raise SnippetTooLarge()
        if tile_size * len(tiles_y_range) > hss.SNIPPET_OSM_MAX_DATA_DIM:
            raise SnippetTooLarge()

        # Extract image tiles
        tiles = []
        for y in tiles_y_range:
            for x in tiles_x_range:
                src = ('{}/{}/{}/{}.png'.format(osm_src, zoom_level, x, y))

                r = s.get(src)

                if r.status_code == 200:
                    tiles.append(Image.open(BytesIO(r.content)).convert('RGB'))
                else:
                    tiles.append(None)

        osm_snip = get_frag_from_image_tiles(tiles, tile_size, tiles_x_range,
                                             tiles_y_range, tile_start1_id,
                                             tile_start2_id, start1, end1,
                                             start2, end2)

        if not no_cache:
            with BytesIO() as b:
                np.save(b, osm_snip)
                rdb.set('osm_snip_%s' % id, b.getvalue(), 60 * 30)

        ims.append(osm_snip)

    return ims