Пример #1
0
 def __init__(self, url, payload=None, post=False):
     bundleID = bundle()
     cacheName = volatile(bundleID + "_requests_cache")
     requests_cache.configure(cacheName)
     if payload:
         self.request = requests.get(url, params=payload) if not post else requests.post(url, data=payload)
     else:
         self.request = requests.get(url)
Пример #2
0
    def __init__(self, name=CACHE_DEFAULT_NAME, *args, **kwargs):
        self._name = name
        self._stats = {"fetched": 0}

        if "cache_time" in kwargs:
            cache_time = kwargs["cache_time"] / 60
        else:
            cache_time = CACHE_DEFAULT_TIMEOUT / 60
        requests_cache.configure(self._name, expire_after=cache_time)
Пример #3
0
 def __init__(self, timeout_secs=5, raise_for_status=True):
     self._raise_for_status = raise_for_status
     self.timeout_secs = timeout_secs
     self.is_offline = os.getenv("OFFLINE", "false").lower() == "true"
     if self.is_offline:
         logging.info("Offline mode: Setting up offline store")
         requests_cache.configure('../offline_data/data_cache',
                                  backend='sqlite',
                                  expire_after=10000,
                                  allowable_methods=('GET', 'POST'))
Пример #4
0
 def __init__(self, username, password, caching=False):
     """This will create a new moodle handshake.
     """
     self.login(username, password)
     if caching:
         try:
             import requests_cache
             requests_cache.configure('.moodle_cache')
         except ImportError:
             logging.warning("requests_cache cannot be imported")
             logging.warning("Moodle will be requested without caching")
Пример #5
0
    def __init__(self, name=CACHE_DEFAULT_NAME, *args, **kwargs):
        self._name = name
        self._stats = {
            'fetched': 0,
        }

        if 'cache_time' in kwargs:
            cache_time = kwargs['cache_time'] / 60
        else:
            cache_time = CACHE_DEFAULT_TIMEOUT / 60
        requests_cache.configure(self._name, expire_after=cache_time)
Пример #6
0
 def __init__(self, username, password, caching=False):
     """This will create a new moodle handshake.
     """
     self.login(username, password)
     if caching:
         logging.info("caching enable")
         try:
             import requests_cache
             requests_cache.configure('.moodle_cache')
         except ImportError:
             logging.warning("requests_cache cannot be imported")
             logging.warning("Moodle will be requested without caching")
Пример #7
0
 def test_expire_cache(self):
     delay = 1
     url = 'http://httpbin.org/delay/%s' % delay
     requests_cache.configure(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.001)
     t = time.time()
     r = requests.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, delay)
     time.sleep(0.5)
     t = time.time()
     r = requests.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, delay)
Пример #8
0
    def test_post_params(self):
        # issue #2
        requests_cache.configure(CACHE_NAME, CACHE_BACKEND, allowable_methods=('GET', 'POST'))

        for _ in range(3):
            d = {'param1': 'test1'}
            self.assertEqual(self.post(d)['form'], d)
            d = {'param1': 'test1', 'param3': 'test3'}
            self.assertEqual(self.post(d)['form'], d)
            d = {'param1': 'test1', 'param3': 'test3'}
            self.assertEqual(self.post(d)['form'], d)
            d = [('param1', 'test1'), ('param2', 'test2'), ('param3', 'test3')]
            res = sorted(self.post(d)['form'].items())
            self.assertEqual(res, d)
Пример #9
0
def work_in(dirname=None):
    """
    Context manager version of os.chdir. When exited, returns to the working
    directory prior to entering.
    """
    curdir = os.getcwd()
    try:
        if dirname is not None:
            os.chdir(dirname)

        requests_cache.configure(expire_after=60 * 10 * 10)
        changes.initialise()

        yield

    finally:
        os.chdir(curdir)
Пример #10
0
        def do_tests_for(backend):
            requests_cache.configure(CACHE_NAME, backend)
            requests_cache.clear()
            n_threads = 10
            url = 'http://httpbin.org/get'
            def do_requests(url, params):
                for i in range(10):  # for testing write and read from cache
                    requests.get(url, params=params)

            for _ in range(20): # stress test
                threads = [Thread(target=do_requests, args=(url, {'param': i})) for i in range(n_threads)]
                for t in threads:
                    t.start()
                for t in threads:
                    t.join()

                for i in range(n_threads):
                    self.assert_(requests_cache.has_url('%s?param=%s' % (url, i)))
Пример #11
0
    def test_post_data(self):
        # issue #2, raw payload
        requests_cache.configure(CACHE_NAME, CACHE_BACKEND,
                                 allowable_methods=('GET', 'POST'))
        d1 = json.dumps({'param1': 'test1'})
        d2 = json.dumps({'param1': 'test1', 'param2': 'test2'})
        d3 = str('some unicode data')
        if is_py3:
            bin_data = bytes('some binary data', 'utf8')
        else:
            bin_data = bytes('some binary data')

        for d in (d1, d2, d3):
            self.assertEqual(self.post(d)['data'], d)
            r = requests.post(httpbin('post'), data=d)
            self.assert_(hasattr(r, 'from_cache'))

        self.assertEqual(self.post(bin_data)['data'],
                         bin_data.decode('utf8'))
        r = requests.post(httpbin('post'), data=bin_data)
        self.assert_(hasattr(r, 'from_cache'))
Пример #12
0
def get_stages(workflow_status, workflow_name, protocol_name, stage_name, use_cache):
    """Expands information about all stages in the filtered workflows"""
    workflow_pattern = re.compile(workflow_name)
    protocol_pattern = re.compile(protocol_name)
    stage_pattern = re.compile(stage_name)

    if use_cache:
        requests_cache.configure("workflow-info")
    session = ClaritySession.create(None)
    workflows = [workflow for workflow in session.api.get_workflows()
                 if workflow.status == workflow_status and workflow_pattern.match(workflow.name)]

    print("workflow\tprotocol\tstage\turi")
    for workflow in workflows:
        for stage in workflow.api_resource.stages:
            if not protocol_pattern.match(stage.protocol.name) or not stage_pattern.match(stage.name):
                continue
            try:
                print("\t".join([stage.workflow.name, stage.protocol.name, stage.name, stage.uri]))
            except AttributeError as e:
                print("# ERROR workflow={}: {}".format(workflow.uri, e.message))
def turn_on_request_caching():
    import requests_cache
    requests_cache.configure(os.path.join(os.path.expanduser('~'), 
                                          '.fundamentals_test_requests'))
Пример #14
0
import json

import requests
import requests_cache

from lassie import Lassie


parser = argparse.ArgumentParser(description='Separates URLs with 200 status code from those without.')
parser.add_argument('bmfile', help='Bookmarks file in JSON list format.')
args = parser.parse_args()

not_ok = []
bookmarks = []

requests_cache.configure('../cache/requests')
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/43.0.2357.130 Chrome/43.0.2357.130 Safari/537.36'
headers = {'User-Agent': user_agent}
l = Lassie()
l.request_opts = {'headers': headers}
webclient = requests.Session()
webclient.headers.update(headers)


with open(args.bmfile, 'r') as f:
    data = json.load(f)

for i, b in enumerate(data['bookmarks']):
    url = b['url']
    if not url or not url.startswith(('http', 'https')):
        continue
Пример #15
0
#!/usr/bin/env python
# coding: utf-8
# Convert browser bookmarks file to json for further processing.
import sys, json, requests, requests_cache, extraction
from bs4 import BeautifulSoup


def get_extract(html, url):
    e = extraction.Extractor().extract(html, source_url=url)
    return {"title": e.title, "description": e.description, "image": e.image, "url": e.url if e.url else url}


requests_cache.configure("urls")
alltags = []
bmfile = sys.argv[1]
html = None
with open(bmfile, "r") as f:
    html = f.read()

not_ok = {}
links = {}
exceptions = {"request": [], "extract": []}
soup = BeautifulSoup(html)
for a in soup.find_all("a"):
    url = a.get("href")
    print("Processing url %s" % url)

    try:
        response = requests.get(url, verify=False, timeout=30)
    except Exception, err:
        exceptions["request"].append(url)
def turn_on_request_caching():
    import requests_cache
    requests_cache.configure(
        os.path.join(os.path.expanduser('~'), '.fundamentals_test_requests'))
# -*- coding: utf-8 -*-
"""
googleapis_translate

Refer to mgoogle_translate.py
"""
import logging
from nose.tools import (eq_, with_setup)

import requests

import requests_cache
requests_cache.configure(allowable_methods=(
    'GET',
    'POST',
))  # noqa POST does not seem to work
requests_cache.configure(
    cache_name='baidu_cache',
    expire_after=604800,
    allowable_methods=('GET', 'POST'))  # noqa a week: 7*24*3600 = 604800
requests_cache.install_cache('googleapis_cache')

HEADERS = {
    "Accept":
    "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Accept-Encoding": "gzip, deflate, sdch, br",
    "Accept-Language": "zh-CN,zh;q=0.8",
    "User-Agent":
    "Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1",
    "Referer": "http://wap.baidu.com"
}
Пример #18
0
import gevent
from gevent import monkey; monkey.patch_socket()
from gevent.queue import Queue
from networkx import DiGraph, shortest_path
from util import pairwise, process_text, term_conditioner, gevent_throttle
import requests
import requests_cache


# request-level cache using HTTP Cache Control
requests_cache.configure('spotify_cache')
# term-level cache
_Q_CACHE = {}

def search_track_url(q, page):
    params = dict(q=q.encode('UTF-8'), page=page)
    return requests.Request("http://ws.spotify.com/search/1/track.json",
            params=params).full_url


# Spotify MetaAPI dictates max rate of 10 requests per second
throttle = gevent_throttle(10)(lambda:None)

# TODO timeout and exception handling
# TODO cache only useful stuff | if the track name is a substring of a poem
# TODO there could be multiple title matches - perhaps, should
# make them all available - maybe even tweak by popularity or genre
def search_track(q, page=1):
    q = term_conditioner(q)
    has_tracks = False
    if not _Q_CACHE.has_key(q):
Пример #19
0
import json
import requests, requests_cache
from geojson import GeoJSONCoder

# select basic sqlite caching backend
requests_cache.configure('http_cache')

class Resource(object):
    def __init__(self, uri, name=None, package=None):
        self.uri = uri
        self.name = name
        self.package = package
        self.lbl = ''.join([self.name, ' (', self.package, ')'])

    def __unicode__(self):
        # TODO not working?
        return self.lbl

    # TODO caching does not take into account Accept-headers!!!

    def csv(self):
        print self.uri
        r = requests.get(self.uri, headers={'Accept' : 'text/csv'})
        return r.text

    def json(self, syntax='plain'):
        r = requests.get(self.uri, headers={'Accept' : 'application/json'})
        data = r.json

        if syntax == 'geo':
            G = GeoJSONCoder()
Пример #20
0
import json
import requests, requests_cache
from geojson import GeoJSONCoder

# select basic sqlite caching backend
requests_cache.configure('http_cache')


class Resource(object):
    def __init__(self, uri, name=None, package=None):
        self.uri = uri
        self.name = name
        self.package = package
        self.lbl = ''.join([self.name, ' (', self.package, ')'])

    def __unicode__(self):
        # TODO not working?
        return self.lbl

    # TODO caching does not take into account Accept-headers!!!

    def csv(self):
        print self.uri
        r = requests.get(self.uri, headers={'Accept': 'text/csv'})
        return r.text

    def json(self, syntax='plain'):
        r = requests.get(self.uri, headers={'Accept': 'application/json'})
        data = r.json

        if syntax == 'geo':
Пример #21
0
def turn_on_request_caching():
    import requests_cache
    requests_cache.configure(cache_name='vector_cache_test')
Пример #22
0
 def handle(self, *args, **options):
     import requests_cache
     requests_cache.configure("update-social")
     self.logger = logging.getLogger(__name__)
     self.updater = FeedUpdater(self.logger)
     self.updater.update_feeds()
Пример #23
0
        'rawdate':item.xpath("self::*//p[@class='date']/text()")[0].strip(),
        'summary':htmlize(''.join([lxml.html.tostring(x) for x in item.xpath("self::*//div[@class='item-summary']/p[not(@class)]")]))
      }
      builder.append(data)
    scraperwiki.sqlite.save(table_name = '_index', data = builder, unique_keys = ['url'])
    print len(builder)
    if len(builder) != 10:
      break
try:
  scraperwiki.sqlite.execute("alter table _index add column date")
  scraperwiki.sqlite.execute("alter table _index add column markdown")
except Exception,e:
  print repr(e)


requests_cache.configure()
for i in scraperwiki.sqlite.select("url, rawdate from _index"):
  print i['url']
  r=requests.get(i['url'])
  html=r.text
  root=lxml.html.fromstring(html)
  fragment = root.xpath("//div[@class='news-details-body']")[0]
  markdown = htmlize(lxml.html.tostring(fragment))
  scraperwiki.sqlite.execute("update _index set markdown = ? where url = ?",(markdown, i['url']))
  
  nicedate = parsedate(i['rawdate'])
  nicerdate= datetime.datetime.strftime( datetime.datetime.strptime(nicedate, '%Y-%m-%d'), '%d-%b-%Y')
  if nicerdate[0]=='0': nicerdate=nicerdate[1:]
  scraperwiki.sqlite.execute("update _index set date = ? where url = ?",(nicerdate, i['url']))
  scraperwiki.sqlite.commit()
Пример #24
0
    register_extensions(app)
    register_blueprints(app)
    register_errorhandlers(app)
    return app


def register_extensions(app):
    assets.init_app(app)
    bcrypt.init_app(app)
    cache.init_app(app)
    debug_toolbar.init_app(app)
    return None


def register_blueprints(app):
    app.register_blueprint(public.views.blueprint)
    return None


def register_errorhandlers(app):
    def render_error(error):
        # If a HTTPException, pull the `code` attribute; default to 500
        error_code = getattr(error, 'code', 500)
        return render_template("{0}.html".format(error_code)), error_code
    for errcode in [401, 404, 500]:
        app.errorhandler(errcode)(render_error)
    return None


requests_cache.configure('/tmp/cache',  expire_after=600)
Пример #25
0
 def __init__(self, logger=None, use_cache=False):
     self.logger = logger or logging.getLogger(__name__)
     if use_cache:
         cache_name = "process-types"
         requests_cache.configure(cache_name)
Пример #26
0
    DATA_TYPES = ("elections", "parties", "candidates", "prebudgets", 
                  "prebudgets_csv")

    for imp in options.imports:
        if imp not in DATA_TYPES:
            sys.stderr.write("Unsupported data type '%s'.\nSupported data types:\n" % imp)
            for dt in DATA_TYPES:
                sys.stderr.write("  %s\n" % dt)
            exit(1)

    http = HttpFetcher()

    if not options.disable_cache:
        cache_dir = ".cache"
        http.set_cache_dir(cache_dir)
        requests_cache.configure("importers")

    if options.django:
        from importers.backends.django import DjangoBackend

        # We need to start logging after Django initializes
        # because Django can be configured to reset logging.
        logger = init_logging(debug=options.verbose)
        backend = DjangoBackend(logger, replace=options.replace)
    else:
        logger = init_logging(debug=options.verbose)
        backend = Backend(logger, replace=options.replace)

    if not options.disable_cache:
        logger.debug("Setting up HTTP cache in %s" % cache_dir)
    else:
Пример #27
0
"""
import sys
import os
import pprint
import argparse
import requests
import requests.auth
import simplejson as json

try:
    CACHE_NAME = __file__ + "-cache"
    # Use requests-cache if available.  We cache successful
    # requests for 1 minute, because requests_cache only supports
    # cache duration multiples of a minute...
    import requests_cache
    requests_cache.configure(cache_name=CACHE_NAME, expire_after=1)
except ImportError:
    pass


def twitter_oauth():
    """Return OAUTH authentication to be used in by requests library."""
    # Must supply twitter account credentials in environment variables.
    consumer_key = unicode(os.environ['CONSUMER_KEY'])
    consumer_secret = unicode(os.environ['CONSUMER_SECRET'])
    oauth_token = unicode(os.environ['ACCESS_KEY'])
    oauth_token_secret = unicode(os.environ['ACCESS_SECRET'])
    return requests.auth.OAuth1(consumer_key,
                                consumer_secret,
                                oauth_token,
                                oauth_token_secret,
Пример #28
0
 def __init__(self, logger=None, use_cache=False):
     self.logger = logger or logging.getLogger(__name__)
     if use_cache:
         cache_name = "process-types"
         requests_cache.configure(cache_name)
Пример #29
0
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU Affero General Public License for more details.
##
##  You should have received a copy of the GNU Affero General Public License
##  along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Provides a simple abstraction against the OSM API"""

import requests
import requests_cache
import logging

logging.basicConfig(level=logging.DEBUG)

rs = requests.session(headers={'user-agent': 'changemonger/0.0.1'})
requests_cache.configure('osm_cache')

server = 'api.openstreetmap.org'

def getNode(id, version = None):
    id = str(id)
    if version:
        url = "http://%s/api/0.6/node/%s/%s" % (server, id, str(version))
    else:
        url = "http://%s/api/0.6/node/%s" % (server, id)
    logging.debug("Retrieving %s for node %s version %s" % (
        url, id, version))
    r = rs.get(url)
    r.raise_for_status()
    return r.content
Пример #30
0
 def setUp(self):
     requests_cache.configure(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE)
     requests_cache.clear()
Пример #31
0
    print url
    r = requests.get(url)
    doc = html.fromstring(r.text)
    el_list = doc.xpath('//a[@name]')
    members = []
    for idx, el in enumerate(el_list):
        # The first council member is sometimes encoded differently...
        if idx == 0 and el.getnext() != None:
            name = el.getnext().text_content()
        else:
            name = el.tail
        name = name.strip()
        members.append((name, party))
    return members

requests_cache.configure('jyvaskyla')

members = []
BASE_URL = 'http://www.jyvaskyla.fi/hallinto/valtuusto/valtuusto09'

r = requests.get(BASE_URL)
doc = html.fromstring(r.text)
# We will be fetching linked pages, so relative paths must be
# convert into absolute URLs.
doc.make_links_absolute(BASE_URL)

# Find the p element that contains the text "Valtuustoryhmät"
el = doc.xpath(u"//h2[contains(., 'Valtuustoryhmät')]")[0]
# The links to the council groups follow
party_links = el.xpath("following-sibling::p/a")
for link_el in party_links:
Пример #32
0
 def setUp(self):
     requests_cache.configure(CACHE_NAME, backend=CACHE_BACKEND)
     requests_cache.clear()
# -*- coding: utf-8 -*-
# Get programming languages data from freebase and save in json format 
# appropriate for generating a network graph with sigma.js.
import requests, requests_cache, json

requests_cache.configure('freebase')
langs = []
paradigms = {}

with open('query.json') as f:
    query = f.read()

r = requests.get('https://www.googleapis.com/freebase/v1/mqlread', params={'query': query})
res = json.loads(r.text)['result']
for index, lang in enumerate(res):

    paras = []
    for i in lang['language_paradigms']:
        pid = i['id']
        name = i['name']
        paras.append({'id': pid, 'name': name})
        if pid not in paradigms:
            paradigms[pid] = {'count': 1, 'name': name, 'id': pid}
        else:
            paradigms[pid]['count'] += 1

    langs.append({
        'index': index,
        'size': len(lang['influenced']),
        'influenced': [{'id': i['id'], 'name': i['name']} for i in lang['influenced']],
        'paradigms': paras,
Пример #34
0
 def test_unregistered_backend(self):
     with self.assertRaises(ValueError):
         requests_cache.configure(CACHE_NAME, backend='nonexistent')
Пример #35
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import requests
import requests_cache
from lxml import html

from utils import ScrapeError, submit_council_members
from finland import PARTIES

requests_cache.configure('helsinki')

r = requests.get('http://www.hel.fi/hki/helsinki/fi/P__t_ksenteko+ja+hallinto/P__t_ksenteko/Kaupunginvaltuusto/Valtuuston+j_senet')
doc = html.fromstring(r.text)

# Find the p element that contains the text "Kaupunginvaltuuston jäsenet"
el = doc.xpath(u"//p/strong[contains(., 'Kaupunginvaltuuston jäsenet')]")[0]
# Find the first table element following the p element
table_el = el.xpath("../following-sibling::table")[0]
rows = table_el.xpath("tr")

members = []

# The first row is header, skip it
for row in rows[1:]:
    el = row.xpath("td")[0]
    # Some of the elements have multiple lines (with email address
    # on the 2nd line. Take only the first line.
    s = el.text_content().split('\n')[0].strip()
    if not s:
        continue
Пример #36
0
 def handle(self, *args, **options):
     import requests_cache
     requests_cache.configure("update-social")
     self.logger = logging.getLogger(__name__)
     self.updater = FeedUpdater(self.logger)
     self.updater.update_feeds()
Пример #37
0
 def __init__(self, session, use_cache):
     self.session = session
     if use_cache:
         # TODO: The cache is being ignored 
         cache_name = "reporting-svc-cache"
         requests_cache.configure(cache_name)
Пример #38
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import requests
import requests_cache
from lxml import html

from utils import ScrapeError, submit_council_members
from finland import PARTIES

requests_cache.configure('helsinki')

r = requests.get(
    'http://www.hel.fi/hki/helsinki/fi/P__t_ksenteko+ja+hallinto/P__t_ksenteko/Kaupunginvaltuusto/Valtuuston+j_senet'
)
doc = html.fromstring(r.text)

# Find the p element that contains the text "Kaupunginvaltuuston jäsenet"
el = doc.xpath(u"//p/strong[contains(., 'Kaupunginvaltuuston jäsenet')]")[0]
# Find the first table element following the p element
table_el = el.xpath("../following-sibling::table")[0]
rows = table_el.xpath("tr")

members = []

# The first row is header, skip it
for row in rows[1:]:
    el = row.xpath("td")[0]
    # Some of the elements have multiple lines (with email address
    # on the 2nd line. Take only the first line.
    s = el.text_content().split('\n')[0].strip()
Пример #39
0
# -*- coding: utf-8 -*-
import requests, requests_cache, json, re
from bs4 import BeautifulSoup

requests_cache.configure('delicious_links')
urllist = []
re_ws = re.compile(r'\s+')

def cleanws(s):
    return re.sub(re_ws, ' ', s)

with open('data.json') as f:
    bookmarks = json.load(f)

for p in bookmarks['posts']:
    url = p['post']['href']
    r = requests.get(url)
    if 200 != r.status_code:
        continue

    soup = BeautifulSoup(r.text)

    if not soup.title:
        continue
    title = soup.title.text

    try:
        description = str(soup.find_all('meta', attrs={'name':'description'})[0]['content'])
    except:
        if soup.p:
            description = soup.p.text
Пример #40
0
import requests
import requests_cache
import bs4
import datetime
import json
from icalendar import Calendar, Event  # icalendar==3.9.0

requests_cache.configure('cache_database', expire_after=60*60)

headers = {'Content-Type': 'text/calendar; charset=utf-8',
    'Content-Disposition': 'inline; filename=calendar.ics'}


def generate_calendar():
    req = requests.get('http://bezpieczna.um.warszawa.pl/imprezy-masowe/zgromadzenia')
    soup = bs4.BeautifulSoup(req.text)
    trs = soup.find('table', attrs={'class': 'ViewsTable'}).findAll('tr')
    label = [x.text.strip() for x in trs[0].findAll('th')]

    cal = Calendar()
    cal.add('prodid', '-//Zgromadzenia publiczne w Warszawie//jawne.info.pl//')
    cal.add('version', '0.1.0')

    for tr in trs[1:]:
        date_string = tr.find('td').text.strip()
        date = datetime.datetime.strptime(date_string, '%Y-%m-%d').date()

        values = [x.text.strip() for x in tr.findAll('td')]
        text = json.dumps(dict(zip(label, values)), indent=4)

        event = Event()
Пример #41
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time

import requests
from requests import async
import requests_cache

requests_cache.configure("example_cache")


def main():
    # Once cached, delayed page will be taken from cache
    # redirects also handled
    for i in range(5):
        requests.get("http://httpbin.org/delay/2")
        r = requests.get("http://httpbin.org/redirect/5")
        print(r.text)

    # What about async? It's also supported!
    rs = [async.get("http://httpbin.org/delay/%s" % i) for i in range(5)]
    for r in async.map(rs):
        print(r.text)

    # And if we need to get fresh page or don't want to cache it?
    with requests_cache.disabled():
        print(requests.get("http://httpbin.org/ip").text)

    # Debugging info about cache
    print(requests_cache.get_cache())
Пример #42
0
A Python client for the New York Times Campaign Finance API
"""
__author__ = "Derek Willis ([email protected])"
__version__ = "0.4.0"

import os
import requests
import requests_cache

__all__ = ('NytCampfin', 'NytCampfinError', 'NytNotFoundError')

DEBUG = False

CURRENT_CYCLE = 2012

requests_cache.configure(expire_after=5)

# Error classes


class NytCampfinError(Exception):
    """
    Exception for New York Times Campaign Finance API errors
    """


class NytNotFoundError(NytCampfinError):
    """
    Exception for things not found
    """
Пример #43
0
"""
import sys
import os
import pprint
import argparse
import requests
import requests.auth
import simplejson as json

try:
    CACHE_NAME = __file__ + "-cache"
    # Use requests-cache if available.  We cache successful
    # requests for 1 minute, because requests_cache only supports
    # cache duration multiples of a minute...
    import requests_cache
    requests_cache.configure(cache_name=CACHE_NAME, expire_after=1)
except ImportError:
    pass

def twitter_oauth():
    """Return OAUTH authentication to be used in by requests library."""
    # Must supply twitter account credentials in environment variables.
    consumer_key = unicode(os.environ['CONSUMER_KEY'])
    consumer_secret = unicode(os.environ['CONSUMER_SECRET'])
    oauth_token = unicode(os.environ['ACCESS_KEY'])
    oauth_token_secret = unicode(os.environ['ACCESS_SECRET'])
    return requests.auth.OAuth1(
        consumer_key,
        consumer_secret,
        oauth_token,
        oauth_token_secret,
Пример #44
0
def main():
    # argument parsing
    args = parse_args()

    if args.API_KEY and args.API_SECRET:
        network = lastfmconnect(api_key=args.API_KEY, api_secret=args.API_SECRET)
    else:
        network = lastfmconnect()

    if not args.columns:
        args.columns = args.rows

    # cache for python-requests
    if not args.disable_cache:
        cache_folder = os.path.expanduser("~/.local/share/lastfm_cg/")
        if not os.path.exists(cache_folder):
            logger.info("Cache folder not found. Creating %s", cache_folder)
            os.makedirs(cache_folder)
            if not os.path.isfile(cache_folder + "lastfm_cg_cache.sqlite"):
                original_folder = os.getcwd()
                os.chdir(cache_folder)
                requests_cache.install_cache("lastfm_cg_cache")
                os.chdir(original_folder)
        requests_cache.configure(os.path.expanduser(cache_folder + "lastfm_cg_cache"))

    if args.username:
        users = [x.strip() for x in args.username.split(",")]
    else:
        logger.error("Use the -u/--username flag to set an username.")
        exit()

    if args.timeframe not in TIMEFRAME_VALUES:
        logger.error(
            "Incorrect value %s for timeframe. Accepted values : %s",
            args.columns,
            TIMEFRAME_VALUES,
        )
        exit()

    for username in users:
        user = network.get_user(username)

        nb_covers = args.rows * args.columns if not args.top100 else 100
        list_covers = lastfm_utils.get_list_covers(
            user=user, nb_covers=nb_covers, timeframe=args.timeframe
        )
        img = (
            image_utils.create_image(list_covers=list_covers, nb_columns=args.columns)
            if not args.top100
            else image_utils.create_top100_image(list_covers=list_covers)
        )

        # export image
        if args.output_filename:
            export_filename = args.output_filename
        elif args.top100:
            export_filename = (
                f"{args.timeframe}_{username}_top100_{int(time.time())}.png"
            )
        else:
            export_filename = f"{args.timeframe}_{username}_{args.columns*args.rows:004}_{int(time.time())}.png"
        img.save(export_filename)

    logger.info("Runtime : %.2f seconds." % (time.time() - temps_debut))
Пример #45
0
A Python client for the New York Times Campaign Finance API
"""
__author__ = "Derek Willis ([email protected])"
__version__ = "0.4.0"

import os
import requests
import requests_cache

__all__ = ('NytCampfin', 'NytCampfinError', 'NytNotFoundError')

DEBUG = False

CURRENT_CYCLE = 2012

requests_cache.configure(expire_after=5)

# Error classes

class NytCampfinError(Exception):
    """
    Exception for New York Times Campaign Finance API errors
    """

class NytNotFoundError(NytCampfinError):
    """
    Exception for things not found
    """

# Clients
Пример #46
0
import os

import requests_cache
import reversion
import time
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db import transaction
from gusregon import GUS
from tqdm import tqdm

from epuap_watchdog.institutions.models import Institution, REGON, REGONError
from epuap_watchdog.institutions.utils import normalize_regon

requests_cache.configure()


class Command(BaseCommand):
    help = "Command to import REGON database."

    def add_arguments(self, parser):
        parser.add_argument('--comment', required=True, help="Description of changes eg. data source description")
        parser.add_argument('--update', dest='update', action='store_true')
        parser.add_argument('--institutions_id', type=int, nargs='+', help="Institution IDs updated")
        parser.add_argument('--no-progress', dest='no_progress', action='store_false')

    def handle(self, comment, institutions_id, update, no_progress, *args, **options):
        gus = GUS(api_key=settings.GUSREGON_API_KEY, sandbox=settings.GUSREGON_SANDBOX)
        if settings.GUSREGON_SANDBOX is True:
            self.stderr.write("You are using sandbox mode for the REGON database. Data may be incorrect. "
                              "Set the environemnt variable GUSREGON_SANDBOX and GUSREGON_API_KEY correctly.")
Пример #47
0
from time import sleep
from random import random

import js2py  # type: ignore

import requests_cache  # type: ignore

LOGGER = logging.getLogger(__name__)
LOGGER.addHandler(logging.NullHandler())

HOME_FOLDER = Path.home()
__FILE__ = globals().get('__file__') or 'test'
CACHE_NAME = (Path(HOME_FOLDER) / (Path(__FILE__)).stem).as_posix()
EXPIRE_AFTER = 3600

requests_cache.configure(cache_name=CACHE_NAME, expire_after=36000)  # 10 hrs

URL = 'http://translate.google.cn/translate_a/single'

TL = \
    """function RL(a, b) {
        var t = "a";
        var Yb = "+";
        for (var c = 0; c < b.length - 2; c += 3) {
            var d = b.charAt(c + 2),
            d = d >= t ? d.charCodeAt(0) - 87 : Number(d),
            d = b.charAt(c + 1) == Yb ? a >>> d: a << d;
            a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d
        }
        return a
    }
Пример #48
0
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU Affero General Public License for more details.
##
##  You should have received a copy of the GNU Affero General Public License
##  along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Provides a simple abstraction against the OSM API"""

import requests
import requests_cache
import logging

logging.basicConfig(level=logging.DEBUG)

rs = requests.session(headers={'user-agent': 'changemonger/0.0.1'})
requests_cache.configure('osm_cache')

server = 'api.openstreetmap.org'

def getNode(id, version = None):
    id = str(id)
    if version:
        url = "http://%s/api/0.6/node/%s/%s" % (server, id, str(version))
    else:
        url = "http://%s/api/0.6/node/%s" % (server, id)
    logging.debug("Retrieving %s for node %s version %s" % (
        url, id, version))
    r = rs.get(url)
    r.raise_for_status()
    return r.text
Пример #49
0
# Convert browser bookmarks file to json for further processing.
import sys, json, requests, requests_cache, extraction
from bs4 import BeautifulSoup


def get_extract(html, url):
    e = extraction.Extractor().extract(html, source_url=url)
    return {
        'title': e.title,
        'description': e.description,
        'image': e.image,
        'url': e.url if e.url else url
    }


requests_cache.configure('urls')
alltags = []
bmfile = sys.argv[1]
html = None
with open(bmfile, 'r') as f:
    html = f.read()

not_ok = {}
links = {}
exceptions = {'request': [], 'extract': []}
soup = BeautifulSoup(html)
for a in soup.find_all('a'):
    url = a.get('href')
    print('Processing url %s' % url)

    try: