Exemplo n.º 1
0
import zlib
import base64
import pika
import numpy as np
import sklearn.utils as sku
import pandas_datareader as pdr
from sklearn.model_selection import TimeSeriesSplit
from threading import Thread, Lock

FORMAT = '%(asctime)s -%(threadName)11s-\t[%(levelname)s]:%(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger()
logger.info("Start")

session = requests_cache.CachedSession(cache_name='cache',
                                       backend='sqlite',
                                       expire_after=datetime.timedelta(days=5))
symbols_table = pd.read_html(
    "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies", header=0)[0]
symbols = list(symbols_table.loc[:, "Ticker symbol"])[400:]
logger.info("Symbols ready")

unprocessed_symbols = []
data_raw = {}
for symbols_index, symbol in enumerate(symbols):
    try:
        data_raw[symbol] = pdr.quandl.QuandlReader(
            symbol,
            start=datetime.datetime(2010, 1, 1),
            end=datetime.datetime(2015, 1, 1),
            session=session,
Exemplo n.º 2
0
addon = xbmcaddon.Addon()
plugin = Plugin()
plugin.name = addon.getAddonInfo('name')
user_agent = 'Dalvik/2.1.0 (Linux; U; Android 5.1.1; AFTS Build/LVY48F)'
USER_DATA_DIR = xbmc.translatePath(
    addon.getAddonInfo('profile')).decode('utf-8')
CACHE_TIME = int(addon.getSetting('cache_time'))
CACHE_FILE = os.path.join(USER_DATA_DIR, 'cache')
expire_after = timedelta(hours=CACHE_TIME)

if not os.path.exists(USER_DATA_DIR):
    os.makedirs(USER_DATA_DIR)

s = requests_cache.CachedSession(CACHE_FILE,
                                 allowable_methods='GET',
                                 expire_after=expire_after,
                                 old_data_on_error=True)
s.headers.update({'User-Agent': user_agent})

data_url = 'http://swiftstreamz.com/SwiftLive/swiftlive.php'
api_url = 'http://swiftstreamz.com/SwiftLive/api.php'
list_url = 'http://swiftstreamz.com/SwiftLive/api.php?cat_id={0}'
thumb_url = 'http://swiftstreamz.com/SwiftLive/images/thumbs/{0}|User-Agent={1}'

USER = '******'
PASS = '******'

Email = plugintools.get_setting("Email")
Password = plugintools.get_setting("Password")

if Email == "":
Exemplo n.º 3
0
def clear_cache():
    """Setup and clear the cdragon api cache before and after every test."""
    session = requests_cache.CachedSession('cdragon_cache', expire_after=0)
    session.remove_expired_responses(expire_after=0)
    yield
    session.remove_expired_responses(expire_after=0)
Exemplo n.º 4
0
    def __init__(
        self,
        interactive=False,
        select_first=False,
        cache=True,
        banners=False,
        actors=False,
        custom_ui=None,
        language=None,
        search_all_languages=False,
        apikey=None,
        username=None,
        userkey=None,
        forceConnect=None,  # noqa
        dvdorder=False,
    ):
        """interactive (True/False):
            When True, uses built-in console UI is used to select the correct show.
            When False, the first search result is used.

        select_first (True/False):
            Automatically selects the first series search result (rather
            than showing the user a list of more than one series).
            Is overridden by interactive = False, or specifying a custom_ui

        cache (True/False/str/requests_cache.CachedSession):

            Retrieved URLs can be persisted to to disc.

            True/False enable or disable default caching. Passing
            string specifies the directory where to store the
            "tvdb.sqlite3" cache file. Alternatively a custom
            requests.Session instance can be passed (e.g maybe a
            customised instance of `requests_cache.CachedSession`)

        banners (True/False):
            Retrieves the banners for a show. These are accessed
            via the _banners key of a Show(), for example:

            >>> Tvdb(banners=True)['scrubs']['_banners'].keys()
            [u'fanart', u'poster', u'seasonwide', u'season', u'series']

        actors (True/False):
            Retrieves a list of the actors for a show. These are accessed
            via the _actors key of a Show(), for example:

            >>> t = Tvdb(actors=True)
            >>> t['scrubs']['_actors'][0]['name']
            u'John C. McGinley'

        custom_ui (tvdb_ui.BaseUI subclass):
            A callable subclass of tvdb_ui.BaseUI (overrides interactive option)

        language (2 character language abbreviation):
            The 2 digit language abbreviation used for the returned data,
            and is also used when searching. For a complete list, call
            the `Tvdb.available_languages` method.
            Default is "en" (English).

        search_all_languages (True/False):
            By default, Tvdb will only search in the language specified using
            the language option. When this is True, it will search for the
            show in and language

        apikey (str/unicode):
            Your API key for TheTVDB. You can easily register a key with in
            a few minutes:
            https://thetvdb.com/api-information

        username (str/unicode or None):
            Specify a user account to use for actions which require
            authentication (e.g marking a series as favourite, submitting ratings)

        userkey (str/unicode, or None):
            User authentication key relating to "username".

        forceConnect:
            DEPRECATED. Disabled the timeout-throttling logic. Now has no function
        """

        if forceConnect is not None:
            warnings.warn(
                "forceConnect argument is deprecated and will be removed soon",
                category=DeprecationWarning,
            )

        self.shows = ShowContainer()  # Holds all Show classes
        self.corrections = {}  # Holds show-name to show_id mapping

        self.config = {}

        # Ability to pull key form env-var mostly for unit-tests
        _test_key = os.getenv('TVDB_API_KEY')
        if apikey is None and _test_key is not None:
            apikey = _test_key

        if apikey is None:
            raise ValueError((
                "apikey argument is now required - an API key can be easily registered "
                "at https://thetvdb.com/api-information"))
        self.config['auth_payload'] = {
            "apikey": apikey,
            "username": username or "",
            "userkey": userkey or "",
        }

        self.config['custom_ui'] = custom_ui

        self.config['interactive'] = interactive  # prompt for correct series?

        self.config['select_first'] = select_first

        self.config['search_all_languages'] = search_all_languages

        self.config['dvdorder'] = dvdorder

        if cache is True:
            cache_dir = self._getTempDir()
            LOG.debug("Caching using requests_cache to %s" % cache_dir)
            self.session = requests_cache.CachedSession(
                expire_after=21600,  # 6 hours
                backend='sqlite',
                cache_name=cache_dir,
                include_get_headers=True,
            )
            self.session.cache.create_key = types.MethodType(
                create_key, self.session.cache)
            self.session.remove_expired_responses()
            self.config['cache_enabled'] = True
        elif cache is False:
            LOG.debug("Caching disabled")
            self.session = requests.Session()
            self.config['cache_enabled'] = False
        elif isinstance(cache, str):
            LOG.debug(
                "Caching using requests_cache to specified directory %s" %
                cache)
            # Specified cache path
            self.session = requests_cache.CachedSession(
                expire_after=21600,  # 6 hours
                backend='sqlite',
                cache_name=os.path.join(cache, "tvdb_api"),
                include_get_headers=True,
            )
            self.session.cache.create_key = types.MethodType(
                create_key, self.session.cache)
            self.session.remove_expired_responses()
        else:
            LOG.debug("Using specified requests.Session")
            self.session = cache
            try:
                self.session.get
            except AttributeError:
                raise ValueError((
                    "cache argument must be True/False, string as cache path "
                    "or requests.Session-type object (e.g from requests_cache.CachedSession)"
                ))

        self.config['banners_enabled'] = banners
        self.config['actors_enabled'] = actors

        if language is None:
            self.config['language'] = 'en'
        else:
            self.config['language'] = language

        # The following url_ configs are based of the
        # https://api.thetvdb.com/swagger
        self.config['base_url'] = "http://thetvdb.com"
        self.config['api_url'] = "https://api.thetvdb.com"

        self.config[
            'url_getSeries'] = u"%(api_url)s/search/series?name=%%s" % self.config

        self.config[
            'url_epInfo'] = u"%(api_url)s/series/%%s/episodes" % self.config

        self.config['url_seriesInfo'] = u"%(api_url)s/series/%%s" % self.config
        self.config[
            'url_actorsInfo'] = u"%(api_url)s/series/%%s/actors" % self.config

        self.config[
            'url_seriesBanner'] = u"%(api_url)s/series/%%s/images" % self.config
        self.config['url_seriesBannerInfo'] = (
            u"%(api_url)s/series/%%s/images/query?keyType=%%s" % self.config)
        self.config[
            'url_artworkPrefix'] = u"%(base_url)s/banners/%%s" % self.config

        self.__authorized = False
        self.headers = {
            'Content-Type': 'application/json',
            'Accept': 'application/json',
            'Accept-Language': self.config['language'],
        }
Exemplo n.º 5
0
plt.rcParams['figure.titlesize'] = 'large'
plt.rcParams['figure.titleweight'] = 'bold'
plt.rcParams['xtick.labelsize'] = plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['axes.titlesize'] = 'large'
plt.rcParams['font.size'] = 20
plt.rcParams['font.weight'] = 'bold'
plt.rcParams['axes.labelweight'] = "bold"
plt.rcParams['axes.titlepad'] = 10
plt.rcParams['axes.titleweight'] = "bold"
plt.rcParams['legend.fontsize'] = 14
plt.rcParams["lines.linewidth"] = 5.0
plt.style.use('dark_background')

expire_after = datetime.timedelta(hours=12)
session = requests_cache.CachedSession(cache_name='cache',
                                       backend='sqlite',
                                       expire_after=expire_after)

start = datetime.datetime(1990, 6, 1)
end = datetime.date.today()


def fred_reader(symbol, start=start):
    return pd_reader.data.DataReader(symbol,
                                     'fred',
                                     start,
                                     end,
                                     session=session)


def fred_reader_series(symbol, start=start):
Exemplo n.º 6
0
 def __init__(self, throttle_secs=1.0):
     self._session = requests_cache.CachedSession('.url_fetcher_cache')
     self._throttle_secs = throttle_secs
     self._last_fetch = 0.0
Exemplo n.º 7
0
import json
import requests
import requests_cache

from omni.interfaces.invoke import invoke

BASE_URL = 'https://api.coinmarketcap.com/v1/'

coinmarketcap_session = requests_cache.CachedSession(cache_name="coinmarketcap", expire_after=30, backend='sqlite')

def get_all_tickers(input):
    # todo convert integer repr [0.1] into limit
    params = {}
    params['convert'] = input.convert
    #params['limit'] = limit

    return invoke("GET", url=BASE_URL+'ticker/', params=params, session=coinmarketcap_session)

def get_stats(input):

    params = {}
    params['convert'] = input.convert

    return invoke("GET", url=BASE_URL + 'global/', params=params, session=coinmarketcap_session)
Exemplo n.º 8
0
from urllib3.util.retry import Retry
import datetime

# Set Up Requests Cache
cache_max_age = datetime.timedelta(days=3)
cache_dir = "~/.patent_client"
CACHE_BASE = Path(cache_dir).expanduser()
CACHE_BASE.mkdir(exist_ok=True)
CACHE_CONFIG = dict(
    expire_after=cache_max_age,
    backend=requests_cache.backends.sqlite.DbCache(
        location=str(CACHE_BASE / "requests_cache")),
    allowable_methods=("GET", "POST"),
)

session = requests_cache.CachedSession(**CACHE_CONFIG)
session.cache.remove_old_entries(datetime.datetime.utcnow() - cache_max_age)
session.headers[
    "User-Agent"] = f"Python Patent Clientbot/{__version__} ([email protected])"

# Install a default retry on the session using urrlib3
retry = Retry(total=5, backoff_factor=0.2)
session.mount('https://', HTTPAdapter(max_retries=retry))
session.mount('http://', HTTPAdapter(max_retries=retry))

SETTINGS_FILE = Path("~/.iprc").expanduser()
if not SETTINGS_FILE.exists():
    DEFAULT_SETTINGS = Path(__file__).parent / "default_settings.json"
    shutil.copy(str(DEFAULT_SETTINGS), SETTINGS_FILE)

SETTINGS = json.load(open(SETTINGS_FILE))
Exemplo n.º 9
0
    def __init__(self,
                 interactive=False,
                 select_first=False,
                 debug=False,
                 cache=True,
                 banners=False,
                 actors=False,
                 custom_ui=None,
                 language=None,
                 search_all_languages=False,
                 apikey=None,
                 username=None,
                 userkey=None,
                 forceConnect=False,
                 dvdorder=False,
                 sort_series=True):
        """interactive (True/False):
            When True, uses built-in console UI is used to select the correct show.
            When False, the first search result is used.

        select_first (True/False):
            Automatically selects the first series search result (rather
            than showing the user a list of more than one series).
            Is overridden by interactive = False, or specifying a custom_ui

        debug (True/False) DEPRECATED:
             Replaced with proper use of logging module. To show debug messages:

                 >>> import logging
                 >>> logging.basicConfig(level = logging.DEBUG)

        cache (True/False/str/requests_cache.CachedSession):

            Retrieved URLs can be persisted to to disc.

            True/False enable or disable default caching. Passing
            string specifies the directory where to store the
            "tvdb.sqlite3" cache file. Alternatively a custom
            requests.Session instance can be passed (e.g maybe a
            customised instance of `requests_cache.CachedSession`)

        banners (True/False):
            Retrieves the banners for a show. These are accessed
            via the _banners key of a Show(), for example:

            >>> Tvdb(banners=True)['scrubs']['_banners'].keys()
            [u'fanart', u'poster', u'seasonwide', u'season', u'series']

        actors (True/False):
            Retrieves a list of the actors for a show. These are accessed
            via the _actors key of a Show(), for example:

            >>> t = Tvdb(actors=True)
            >>> t['scrubs']['_actors'][0]['name']
            u'John C. McGinley'

        custom_ui (tvdb_ui.BaseUI subclass):
            A callable subclass of tvdb_ui.BaseUI (overrides interactive option)

        language (2 character language abbreviation):
            The language of the returned data. Is also the language search
            uses. Default is "en" (English). For full list, run..

            >>> Tvdb().config['valid_languages'] #doctest: +ELLIPSIS
            ['da', 'fi', 'nl', ...]

        search_all_languages (True/False):
            By default, Tvdb will only search in the language specified using
            the language option. When this is True, it will search for the
            show in and language

        apikey (str/unicode):
            Override the default thetvdb.com API key. By default it will use
            tvdb_api's own key (fine for small scripts), but you can use your
            own key if desired - this is recommended if you are embedding
            tvdb_api in a larger application)
            See http://thetvdb.com/?tab=apiregister to get your own key

        username (str/unicode):
            Override the default thetvdb.com username. By default it will use
            tvdb_api's own username (fine for small scripts), but you can use your
            own key if desired - this is recommended if you are embedding
            tvdb_api in a larger application)
            See http://thetvdb.com/ to register an account

        userkey (str/unicode):
            Override the default thetvdb.com userkey. By default it will use
            tvdb_api's own userkey (fine for small scripts), but you can use your
            own key if desired - this is recommended if you are embedding
            tvdb_api in a larger application)
            See http://thetvdb.com/ to register an account

        forceConnect (bool):
            If true it will always try to connect to theTVDB.com even if we
            recently timed out. By default it will wait one minute before
            trying again, and any requests within that one minute window will
            return an exception immediately.
        sort_series (bool):
            If true, sort the series list for best match to search term.
            If false, use the sort order returned by theTVDB.com
            The default is true.
        """

        global lastTimeout

        # if we're given a lastTimeout that is less than 1 min just give up
        if not forceConnect and lastTimeout is not None and datetime.datetime.now(
        ) - lastTimeout < datetime.timedelta(minutes=1):
            raise tvdb_error(
                "We recently timed out, so giving up early this time")

        self.shows = ShowContainer()  # Holds all Show classes
        self.corrections = {}  # Holds show-name to show_id mapping

        self.config = {}

        if apikey and username and userkey:
            self.config['auth_payload'] = {
                "apikey": apikey,
                "username": username,
                "userkey": userkey
            }
        else:
            self.config['auth_payload'] = {
                "apikey": "0629B785CE550C8D",
                "userkey": "",
                "username": ""
            }

        self.config['debug_enabled'] = debug  # show debugging messages

        self.config['custom_ui'] = custom_ui

        self.config['interactive'] = interactive  # prompt for correct series?

        self.config['select_first'] = select_first

        self.config['search_all_languages'] = search_all_languages

        self.config['dvdorder'] = dvdorder

        self.config['sort_series'] = sort_series

        if cache is True:
            self.session = requests_cache.CachedSession(
                expire_after=21600,  # 6 hours
                backend='sqlite',
                cache_name=self._getTempDir(),
                include_get_headers=True)
            self.session.remove_expired_responses()
            self.config['cache_enabled'] = True
        elif cache is False:
            self.session = requests.Session()
            self.config['cache_enabled'] = False
        elif isinstance(cache, str):
            # Specified cache path
            self.session = requests_cache.CachedSession(
                expire_after=21600,  # 6 hours
                backend='sqlite',
                cache_name=os.path.join(cache, "tvdb_api"),
                include_get_headers=True)
            self.session.remove_expired_responses()
        else:
            self.session = cache
            try:
                self.session.get
            except AttributeError:
                raise ValueError(
                    "cache argument must be True/False, string as cache path or requests.Session-type object (e.g from requests_cache.CachedSession)"
                )

        self.config['banners_enabled'] = banners
        self.config['actors_enabled'] = actors

        if self.config['debug_enabled']:
            warnings.warn(
                "The debug argument to tvdb_api.__init__ will be removed in the next version. "
                "To enable debug messages, use the following code before importing: "
                "import logging; logging.basicConfig(level=logging.DEBUG)")
            logging.basicConfig(level=logging.DEBUG)

        # List of language from http://thetvdb.com/api/0629B785CE550C8D/languages.xml
        # Hard-coded here as it is realtively static, and saves another HTTP request, as
        # recommended on http://thetvdb.com/wiki/index.php/API:languages.xml
        self.config['valid_languages'] = [
            "da", "fi", "nl", "de", "it", "es", "fr", "pl", "hu", "el", "tr",
            "ru", "he", "ja", "pt", "zh", "cs", "sl", "hr", "ko", "en", "sv",
            "no"
        ]

        # thetvdb.com should be based around numeric language codes,
        # but to link to a series like http://thetvdb.com/?tab=series&id=79349&lid=16
        # requires the language ID, thus this mapping is required (mainly
        # for usage in tvdb_ui - internally tvdb_api will use the language abbreviations)
        self.config['langabbv_to_id'] = {
            'el': 20,
            'en': 7,
            'zh': 27,
            'it': 15,
            'cs': 28,
            'es': 16,
            'ru': 22,
            'nl': 13,
            'pt': 26,
            'no': 9,
            'tr': 21,
            'pl': 18,
            'fr': 17,
            'hr': 31,
            'de': 14,
            'da': 10,
            'fi': 11,
            'hu': 19,
            'ja': 25,
            'he': 24,
            'ko': 32,
            'sv': 8,
            'sl': 30
        }

        if language is None:
            self.config['language'] = 'en'
        else:
            if language not in self.config['valid_languages']:
                raise ValueError("Invalid language %s, options are: %s" %
                                 (language, self.config['valid_languages']))
            else:
                self.config['language'] = language

        # The following url_ configs are based of the
        # http://thetvdb.com/wiki/index.php/Programmers_API
        self.config['base_url'] = "http://thetvdb.com"
        self.config['api_url'] = "https://api.thetvdb.com"

        self.config[
            'url_getSeries'] = u"%(api_url)s/search/series?name=%%s" % self.config
        self.config[
            'url_getSeriesById'] = u"%(api_url)s/search/series?id=%%s" % self.config

        self.config[
            'url_epInfo'] = u"%(api_url)s/series/%%s/episodes" % self.config
        self.config['url_epDetail'] = u"%(api_url)s/episodes/%%s" % self.config

        self.config['url_seriesInfo'] = u"%(api_url)s/series/%%s" % self.config
        self.config[
            'url_actorsInfo'] = u"%(api_url)s/series/%%s/actors" % self.config

        self.config[
            'url_seriesBanner'] = u"%(api_url)s/series/%%s/images" % self.config
        self.config[
            'url_seriesBannerInfo'] = u"%(api_url)s/series/%%s/images/query?keyType=%%s" % self.config
        self.config[
            'url_artworkPrefix'] = u"%(base_url)s/banners/%%s" % self.config

        self.__authorized = False
        self.headers = {
            'Content-Type': 'application/json',
            'Accept': 'application/json',
            'Accept-Language': self.config['language'],
            'User-Agent': 'tvdb/2.0'
        }
Exemplo n.º 10
0
import os, sys, random
import yfinance as yf
import pandas as pd
from tqdm import tqdm
from businessdate import BusinessDate

# TODO: for caching yfinance results
# this is an open issue for the Ticker method: https://github.com/ranaroussi/yfinance/issues/677
import requests_cache
SESH = requests_cache.CachedSession('yfinance.cache')
SESH.headers['User-agent'] = 'my-program/1.0'

#Paths
cwdir = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(1, os.path.join(cwdir, "../"))
from toolbox.scrape_utils import update_session_proxy


def valid_stock(stock_obj):
    '''
    Check if a stock_obj created by yf.Ticker has data
    '''
    try:
        return 'symbol' in stock_obj.info.keys()
    except:
        return False


def tickers_parser(tickers, return_list=False, max_items=None):
    '''
    parse a string of space separated tickers with special handling for HK tickers
Exemplo n.º 11
0
def get_session():
    if 'CACHE_REQUESTS' in os.environ:
        import requests_cache
        return requests_cache.CachedSession()
    import requests
    return requests.Session()
Exemplo n.º 12
0
def validate_image(state, task_meta, **options):
    """
    mod: handle data URIs and responses without a content-type header
    """
    try:
        node_id = task_meta.get('node_id')
        node_path = task_meta.get('node_path')
        prop_name = task_meta.get('prop_name', 'image')
        node_class = task_meta.get('node_class')
        required = bool(task_meta.get('required', False))
        if node_id:
            node = get_node_by_id(state, node_id)
            node_path = [node_id]
        else:
            node = get_node_by_path(state, node_path)

        if options.get('cache_backend'):
            session = requests_cache.CachedSession(
                backend=options['cache_backend'],
                expire_after=options.get('cache_expire_after', 300))
        else:
            session = requests.Session()
    except (IndexError, TypeError, KeyError):
        raise TaskPrerequisitesError()

    actions = []

    image_val = node.get(prop_name)

    if image_val is None:
        return task_result(
            not required,
            "Could not load and validate image in node {}".format(
                abv_node(node_id, node_path)))
    if isinstance(image_val, six.string_types):
        url = image_val
    elif isinstance(image_val, dict):
        url = image_val.get('id')
    elif isinstance(image_val, list):
        return task_result(False, "many images not allowed")
    else:
        raise TypeError("Could not interpret image property value {}".format(
            abbreviate_value(image_val)))

    if url:
        existing_file = state.get('input', {}).get('original_json',
                                                   {}).get(url)
        if existing_file:
            return task_result(
                True, "Image resource already stored for url {}".format(
                    abbreviate_value(url)))
        else:
            try:
                m = IMAGE_DATA_URI.match(url)
                if m:
                    if m.groups('mediatype') not in 'image/png, image/svg+xml':
                        return task_result(
                            True, "Invalid image at url {}".format(
                                abbreviate_value(url)))
                    data_uri = url
                else:
                    result = session.get(url)

                    content_type = result.headers.get('content-type', '')
                    if content_type:
                        if content_type not in 'image/png, image/svg+xml':
                            return task_result(
                                True, "Invalid image at url {}".format(
                                    abbreviate_value(url)))
                    else:
                        image_type = check_image_type(
                            io.BytesIO(result.content))

                        if image_type == 'PNG':
                            content_type = 'image/png'
                        elif image_type == 'SVG':
                            content_type = 'image/svg+xml'
                        else:
                            return task_result(
                                True, "Invalid image at url {}".format(
                                    abbreviate_value(url)))

                    encoded_body = base64.b64encode(result.content)
                    data_uri = "data:{};base64,{}".format(
                        content_type, encoded_body)
            except (requests.ConnectionError, KeyError):
                return task_result(False,
                                   "Could not fetch image at {}".format(url))
            else:
                actions.append(store_original_resource(url, data_uri))

    return task_result(
        True,
        "Validated image for node {}".format(abv_node(node_id,
                                                      node_path)), actions)
Exemplo n.º 13
0
import os

import requests
import requests_cache
import urllib3
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import tempfile

logger = logging.getLogger(__name__)

cachefile = os.path.join(tempfile.gettempdir(), 'anime-cache')
# requests_cache.install_cache(cachefile, backend='sqlite', expire_after=3600)

_session = requests_cache.CachedSession(cachefile,
                                        backend='sqlite',
                                        expire_after=3600)


def cacheinfo_hook(response, *args, **kwargs):
    if not getattr(response, 'from_cache', False):
        logger.debug('uncached request')
    else:
        logger.debug('cached request')
    return response


_session.hooks = {'response': cacheinfo_hook}

# _session = requests.Session()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
Exemplo n.º 14
0
import requests_cache
USER_AGENT = "OSM v0.0.X"
SESSION = requests_cache.CachedSession(expire_after=5 * 60, backend="memory")
SESSION.headers = {"User-Agent": USER_AGENT}


def set_github_token(token):
    global SESSION
    SESSION.headers["Authorization"] = f"token {token}"


def clear_github_token():
    global SESSION
    if "Authorization" in SESSION.headers:
        SESSION.headers.pop("Authorization")
Exemplo n.º 15
0
            LOGGER.debug('avg delay: %s, sleeping %s s, flag: %s', timeout,
                         timeout_, bool(quo))

            # will not sleep (timeout_ * bool(quo)=0) for the first exempt calls
            sleep(timeout_ * bool(quo))

        return response

    return hook


URL = "https://fanyi.baidu.com/translate"
# SESS = requests.Session()
SESS = requests_cache.CachedSession(
    cache_name=CACHE_NAME,
    expire_after=EXPIRE_AFTER,
    allowable_methods=('GET', 'POST'),
)
SESS.hooks = {
    'response': make_throttle_hook(1, 200)
}  # to play safe, default: 0.67, 1000
SESS.get(URL, headers=HEADERS)

# for js_sign below
exec(js2py.translate_js(JS))  # pylint: disable=exec-used


def _js_sign(text, gtk='320305.131321201'):
    '''gtk, does not play  role

    >>> assert _js_sign('test') == '431039.159886'
Exemplo n.º 16
0
 def __init__(self):
     # Create the requests cache
     self.session = requests_cache.CachedSession(
         cache_name='cache',
         backend='sqlite',
         expire_after=seconds_to_cache)
Exemplo n.º 17
0
from urllib.parse import quote_plus
import requests_cache, imghdr
from validators import validate_raw_files
from create_csvs import create_csvs

from ers import all_keywords_de as keywords, mh_brands, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer, fpath_namer
import shutil
import requests
from helpers.random_user_agent import randomua

# Init variables and assets
shop_id = 'galeria'
root_url = 'https://www.galeria-kaufhof.de'
session = requests_cache.CachedSession(fpath_namer(shop_id, 'requests_cache'))
session.headers = {'User-Agent': randomua()}
country = 'DE'
searches, categories, products = {}, {}, {}

from parse import parse


def getprice(pricestr):
    if pricestr.startswith('ab '):
        pricestr = pricestr[3:]
    if not pricestr:
        return
    price = parse('{pound:d} €', pricestr)
    if price:
        return price.named['pound'] * 100
Exemplo n.º 18
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-c',
                        '--count',
                        default='3',
                        type=int,
                        help='Number of searches to run')
    parser.add_argument('-H',
                        '--hours',
                        default='72',
                        type=int,
                        help='Number hours for cache expiration')
    args = parser.parse_args()

    session = requests_cache.CachedSession(
        cache_name='cache',
        backend='sqlite',
        expire_after=datetime.timedelta(hours=args.hours))

    out = []

    for i in range(0, args.count):
        print('Starting run {0} of {1}'.format(i + 1, args.count))
        out += get_list_from_github(session)

    if not out:
        print('Found no data packages, exiting')
        sys.exit(1)

    # Sort and deduplicate.
    out = sorted(set(out))
    print('Found %s data packages' % len(out))