import zlib import base64 import pika import numpy as np import sklearn.utils as sku import pandas_datareader as pdr from sklearn.model_selection import TimeSeriesSplit from threading import Thread, Lock FORMAT = '%(asctime)s -%(threadName)11s-\t[%(levelname)s]:%(message)s' logging.basicConfig(level=logging.INFO, format=FORMAT) logger = logging.getLogger() logger.info("Start") session = requests_cache.CachedSession(cache_name='cache', backend='sqlite', expire_after=datetime.timedelta(days=5)) symbols_table = pd.read_html( "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies", header=0)[0] symbols = list(symbols_table.loc[:, "Ticker symbol"])[400:] logger.info("Symbols ready") unprocessed_symbols = [] data_raw = {} for symbols_index, symbol in enumerate(symbols): try: data_raw[symbol] = pdr.quandl.QuandlReader( symbol, start=datetime.datetime(2010, 1, 1), end=datetime.datetime(2015, 1, 1), session=session,
addon = xbmcaddon.Addon() plugin = Plugin() plugin.name = addon.getAddonInfo('name') user_agent = 'Dalvik/2.1.0 (Linux; U; Android 5.1.1; AFTS Build/LVY48F)' USER_DATA_DIR = xbmc.translatePath( addon.getAddonInfo('profile')).decode('utf-8') CACHE_TIME = int(addon.getSetting('cache_time')) CACHE_FILE = os.path.join(USER_DATA_DIR, 'cache') expire_after = timedelta(hours=CACHE_TIME) if not os.path.exists(USER_DATA_DIR): os.makedirs(USER_DATA_DIR) s = requests_cache.CachedSession(CACHE_FILE, allowable_methods='GET', expire_after=expire_after, old_data_on_error=True) s.headers.update({'User-Agent': user_agent}) data_url = 'http://swiftstreamz.com/SwiftLive/swiftlive.php' api_url = 'http://swiftstreamz.com/SwiftLive/api.php' list_url = 'http://swiftstreamz.com/SwiftLive/api.php?cat_id={0}' thumb_url = 'http://swiftstreamz.com/SwiftLive/images/thumbs/{0}|User-Agent={1}' USER = '******' PASS = '******' Email = plugintools.get_setting("Email") Password = plugintools.get_setting("Password") if Email == "":
def clear_cache(): """Setup and clear the cdragon api cache before and after every test.""" session = requests_cache.CachedSession('cdragon_cache', expire_after=0) session.remove_expired_responses(expire_after=0) yield session.remove_expired_responses(expire_after=0)
def __init__( self, interactive=False, select_first=False, cache=True, banners=False, actors=False, custom_ui=None, language=None, search_all_languages=False, apikey=None, username=None, userkey=None, forceConnect=None, # noqa dvdorder=False, ): """interactive (True/False): When True, uses built-in console UI is used to select the correct show. When False, the first search result is used. select_first (True/False): Automatically selects the first series search result (rather than showing the user a list of more than one series). Is overridden by interactive = False, or specifying a custom_ui cache (True/False/str/requests_cache.CachedSession): Retrieved URLs can be persisted to to disc. True/False enable or disable default caching. Passing string specifies the directory where to store the "tvdb.sqlite3" cache file. Alternatively a custom requests.Session instance can be passed (e.g maybe a customised instance of `requests_cache.CachedSession`) banners (True/False): Retrieves the banners for a show. These are accessed via the _banners key of a Show(), for example: >>> Tvdb(banners=True)['scrubs']['_banners'].keys() [u'fanart', u'poster', u'seasonwide', u'season', u'series'] actors (True/False): Retrieves a list of the actors for a show. These are accessed via the _actors key of a Show(), for example: >>> t = Tvdb(actors=True) >>> t['scrubs']['_actors'][0]['name'] u'John C. McGinley' custom_ui (tvdb_ui.BaseUI subclass): A callable subclass of tvdb_ui.BaseUI (overrides interactive option) language (2 character language abbreviation): The 2 digit language abbreviation used for the returned data, and is also used when searching. For a complete list, call the `Tvdb.available_languages` method. Default is "en" (English). search_all_languages (True/False): By default, Tvdb will only search in the language specified using the language option. When this is True, it will search for the show in and language apikey (str/unicode): Your API key for TheTVDB. You can easily register a key with in a few minutes: https://thetvdb.com/api-information username (str/unicode or None): Specify a user account to use for actions which require authentication (e.g marking a series as favourite, submitting ratings) userkey (str/unicode, or None): User authentication key relating to "username". forceConnect: DEPRECATED. Disabled the timeout-throttling logic. Now has no function """ if forceConnect is not None: warnings.warn( "forceConnect argument is deprecated and will be removed soon", category=DeprecationWarning, ) self.shows = ShowContainer() # Holds all Show classes self.corrections = {} # Holds show-name to show_id mapping self.config = {} # Ability to pull key form env-var mostly for unit-tests _test_key = os.getenv('TVDB_API_KEY') if apikey is None and _test_key is not None: apikey = _test_key if apikey is None: raise ValueError(( "apikey argument is now required - an API key can be easily registered " "at https://thetvdb.com/api-information")) self.config['auth_payload'] = { "apikey": apikey, "username": username or "", "userkey": userkey or "", } self.config['custom_ui'] = custom_ui self.config['interactive'] = interactive # prompt for correct series? self.config['select_first'] = select_first self.config['search_all_languages'] = search_all_languages self.config['dvdorder'] = dvdorder if cache is True: cache_dir = self._getTempDir() LOG.debug("Caching using requests_cache to %s" % cache_dir) self.session = requests_cache.CachedSession( expire_after=21600, # 6 hours backend='sqlite', cache_name=cache_dir, include_get_headers=True, ) self.session.cache.create_key = types.MethodType( create_key, self.session.cache) self.session.remove_expired_responses() self.config['cache_enabled'] = True elif cache is False: LOG.debug("Caching disabled") self.session = requests.Session() self.config['cache_enabled'] = False elif isinstance(cache, str): LOG.debug( "Caching using requests_cache to specified directory %s" % cache) # Specified cache path self.session = requests_cache.CachedSession( expire_after=21600, # 6 hours backend='sqlite', cache_name=os.path.join(cache, "tvdb_api"), include_get_headers=True, ) self.session.cache.create_key = types.MethodType( create_key, self.session.cache) self.session.remove_expired_responses() else: LOG.debug("Using specified requests.Session") self.session = cache try: self.session.get except AttributeError: raise ValueError(( "cache argument must be True/False, string as cache path " "or requests.Session-type object (e.g from requests_cache.CachedSession)" )) self.config['banners_enabled'] = banners self.config['actors_enabled'] = actors if language is None: self.config['language'] = 'en' else: self.config['language'] = language # The following url_ configs are based of the # https://api.thetvdb.com/swagger self.config['base_url'] = "http://thetvdb.com" self.config['api_url'] = "https://api.thetvdb.com" self.config[ 'url_getSeries'] = u"%(api_url)s/search/series?name=%%s" % self.config self.config[ 'url_epInfo'] = u"%(api_url)s/series/%%s/episodes" % self.config self.config['url_seriesInfo'] = u"%(api_url)s/series/%%s" % self.config self.config[ 'url_actorsInfo'] = u"%(api_url)s/series/%%s/actors" % self.config self.config[ 'url_seriesBanner'] = u"%(api_url)s/series/%%s/images" % self.config self.config['url_seriesBannerInfo'] = ( u"%(api_url)s/series/%%s/images/query?keyType=%%s" % self.config) self.config[ 'url_artworkPrefix'] = u"%(base_url)s/banners/%%s" % self.config self.__authorized = False self.headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', 'Accept-Language': self.config['language'], }
plt.rcParams['figure.titlesize'] = 'large' plt.rcParams['figure.titleweight'] = 'bold' plt.rcParams['xtick.labelsize'] = plt.rcParams['ytick.labelsize'] = 15 plt.rcParams['axes.titlesize'] = 'large' plt.rcParams['font.size'] = 20 plt.rcParams['font.weight'] = 'bold' plt.rcParams['axes.labelweight'] = "bold" plt.rcParams['axes.titlepad'] = 10 plt.rcParams['axes.titleweight'] = "bold" plt.rcParams['legend.fontsize'] = 14 plt.rcParams["lines.linewidth"] = 5.0 plt.style.use('dark_background') expire_after = datetime.timedelta(hours=12) session = requests_cache.CachedSession(cache_name='cache', backend='sqlite', expire_after=expire_after) start = datetime.datetime(1990, 6, 1) end = datetime.date.today() def fred_reader(symbol, start=start): return pd_reader.data.DataReader(symbol, 'fred', start, end, session=session) def fred_reader_series(symbol, start=start):
def __init__(self, throttle_secs=1.0): self._session = requests_cache.CachedSession('.url_fetcher_cache') self._throttle_secs = throttle_secs self._last_fetch = 0.0
import json import requests import requests_cache from omni.interfaces.invoke import invoke BASE_URL = 'https://api.coinmarketcap.com/v1/' coinmarketcap_session = requests_cache.CachedSession(cache_name="coinmarketcap", expire_after=30, backend='sqlite') def get_all_tickers(input): # todo convert integer repr [0.1] into limit params = {} params['convert'] = input.convert #params['limit'] = limit return invoke("GET", url=BASE_URL+'ticker/', params=params, session=coinmarketcap_session) def get_stats(input): params = {} params['convert'] = input.convert return invoke("GET", url=BASE_URL + 'global/', params=params, session=coinmarketcap_session)
from urllib3.util.retry import Retry import datetime # Set Up Requests Cache cache_max_age = datetime.timedelta(days=3) cache_dir = "~/.patent_client" CACHE_BASE = Path(cache_dir).expanduser() CACHE_BASE.mkdir(exist_ok=True) CACHE_CONFIG = dict( expire_after=cache_max_age, backend=requests_cache.backends.sqlite.DbCache( location=str(CACHE_BASE / "requests_cache")), allowable_methods=("GET", "POST"), ) session = requests_cache.CachedSession(**CACHE_CONFIG) session.cache.remove_old_entries(datetime.datetime.utcnow() - cache_max_age) session.headers[ "User-Agent"] = f"Python Patent Clientbot/{__version__} ([email protected])" # Install a default retry on the session using urrlib3 retry = Retry(total=5, backoff_factor=0.2) session.mount('https://', HTTPAdapter(max_retries=retry)) session.mount('http://', HTTPAdapter(max_retries=retry)) SETTINGS_FILE = Path("~/.iprc").expanduser() if not SETTINGS_FILE.exists(): DEFAULT_SETTINGS = Path(__file__).parent / "default_settings.json" shutil.copy(str(DEFAULT_SETTINGS), SETTINGS_FILE) SETTINGS = json.load(open(SETTINGS_FILE))
def __init__(self, interactive=False, select_first=False, debug=False, cache=True, banners=False, actors=False, custom_ui=None, language=None, search_all_languages=False, apikey=None, username=None, userkey=None, forceConnect=False, dvdorder=False, sort_series=True): """interactive (True/False): When True, uses built-in console UI is used to select the correct show. When False, the first search result is used. select_first (True/False): Automatically selects the first series search result (rather than showing the user a list of more than one series). Is overridden by interactive = False, or specifying a custom_ui debug (True/False) DEPRECATED: Replaced with proper use of logging module. To show debug messages: >>> import logging >>> logging.basicConfig(level = logging.DEBUG) cache (True/False/str/requests_cache.CachedSession): Retrieved URLs can be persisted to to disc. True/False enable or disable default caching. Passing string specifies the directory where to store the "tvdb.sqlite3" cache file. Alternatively a custom requests.Session instance can be passed (e.g maybe a customised instance of `requests_cache.CachedSession`) banners (True/False): Retrieves the banners for a show. These are accessed via the _banners key of a Show(), for example: >>> Tvdb(banners=True)['scrubs']['_banners'].keys() [u'fanart', u'poster', u'seasonwide', u'season', u'series'] actors (True/False): Retrieves a list of the actors for a show. These are accessed via the _actors key of a Show(), for example: >>> t = Tvdb(actors=True) >>> t['scrubs']['_actors'][0]['name'] u'John C. McGinley' custom_ui (tvdb_ui.BaseUI subclass): A callable subclass of tvdb_ui.BaseUI (overrides interactive option) language (2 character language abbreviation): The language of the returned data. Is also the language search uses. Default is "en" (English). For full list, run.. >>> Tvdb().config['valid_languages'] #doctest: +ELLIPSIS ['da', 'fi', 'nl', ...] search_all_languages (True/False): By default, Tvdb will only search in the language specified using the language option. When this is True, it will search for the show in and language apikey (str/unicode): Override the default thetvdb.com API key. By default it will use tvdb_api's own key (fine for small scripts), but you can use your own key if desired - this is recommended if you are embedding tvdb_api in a larger application) See http://thetvdb.com/?tab=apiregister to get your own key username (str/unicode): Override the default thetvdb.com username. By default it will use tvdb_api's own username (fine for small scripts), but you can use your own key if desired - this is recommended if you are embedding tvdb_api in a larger application) See http://thetvdb.com/ to register an account userkey (str/unicode): Override the default thetvdb.com userkey. By default it will use tvdb_api's own userkey (fine for small scripts), but you can use your own key if desired - this is recommended if you are embedding tvdb_api in a larger application) See http://thetvdb.com/ to register an account forceConnect (bool): If true it will always try to connect to theTVDB.com even if we recently timed out. By default it will wait one minute before trying again, and any requests within that one minute window will return an exception immediately. sort_series (bool): If true, sort the series list for best match to search term. If false, use the sort order returned by theTVDB.com The default is true. """ global lastTimeout # if we're given a lastTimeout that is less than 1 min just give up if not forceConnect and lastTimeout is not None and datetime.datetime.now( ) - lastTimeout < datetime.timedelta(minutes=1): raise tvdb_error( "We recently timed out, so giving up early this time") self.shows = ShowContainer() # Holds all Show classes self.corrections = {} # Holds show-name to show_id mapping self.config = {} if apikey and username and userkey: self.config['auth_payload'] = { "apikey": apikey, "username": username, "userkey": userkey } else: self.config['auth_payload'] = { "apikey": "0629B785CE550C8D", "userkey": "", "username": "" } self.config['debug_enabled'] = debug # show debugging messages self.config['custom_ui'] = custom_ui self.config['interactive'] = interactive # prompt for correct series? self.config['select_first'] = select_first self.config['search_all_languages'] = search_all_languages self.config['dvdorder'] = dvdorder self.config['sort_series'] = sort_series if cache is True: self.session = requests_cache.CachedSession( expire_after=21600, # 6 hours backend='sqlite', cache_name=self._getTempDir(), include_get_headers=True) self.session.remove_expired_responses() self.config['cache_enabled'] = True elif cache is False: self.session = requests.Session() self.config['cache_enabled'] = False elif isinstance(cache, str): # Specified cache path self.session = requests_cache.CachedSession( expire_after=21600, # 6 hours backend='sqlite', cache_name=os.path.join(cache, "tvdb_api"), include_get_headers=True) self.session.remove_expired_responses() else: self.session = cache try: self.session.get except AttributeError: raise ValueError( "cache argument must be True/False, string as cache path or requests.Session-type object (e.g from requests_cache.CachedSession)" ) self.config['banners_enabled'] = banners self.config['actors_enabled'] = actors if self.config['debug_enabled']: warnings.warn( "The debug argument to tvdb_api.__init__ will be removed in the next version. " "To enable debug messages, use the following code before importing: " "import logging; logging.basicConfig(level=logging.DEBUG)") logging.basicConfig(level=logging.DEBUG) # List of language from http://thetvdb.com/api/0629B785CE550C8D/languages.xml # Hard-coded here as it is realtively static, and saves another HTTP request, as # recommended on http://thetvdb.com/wiki/index.php/API:languages.xml self.config['valid_languages'] = [ "da", "fi", "nl", "de", "it", "es", "fr", "pl", "hu", "el", "tr", "ru", "he", "ja", "pt", "zh", "cs", "sl", "hr", "ko", "en", "sv", "no" ] # thetvdb.com should be based around numeric language codes, # but to link to a series like http://thetvdb.com/?tab=series&id=79349&lid=16 # requires the language ID, thus this mapping is required (mainly # for usage in tvdb_ui - internally tvdb_api will use the language abbreviations) self.config['langabbv_to_id'] = { 'el': 20, 'en': 7, 'zh': 27, 'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9, 'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11, 'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30 } if language is None: self.config['language'] = 'en' else: if language not in self.config['valid_languages']: raise ValueError("Invalid language %s, options are: %s" % (language, self.config['valid_languages'])) else: self.config['language'] = language # The following url_ configs are based of the # http://thetvdb.com/wiki/index.php/Programmers_API self.config['base_url'] = "http://thetvdb.com" self.config['api_url'] = "https://api.thetvdb.com" self.config[ 'url_getSeries'] = u"%(api_url)s/search/series?name=%%s" % self.config self.config[ 'url_getSeriesById'] = u"%(api_url)s/search/series?id=%%s" % self.config self.config[ 'url_epInfo'] = u"%(api_url)s/series/%%s/episodes" % self.config self.config['url_epDetail'] = u"%(api_url)s/episodes/%%s" % self.config self.config['url_seriesInfo'] = u"%(api_url)s/series/%%s" % self.config self.config[ 'url_actorsInfo'] = u"%(api_url)s/series/%%s/actors" % self.config self.config[ 'url_seriesBanner'] = u"%(api_url)s/series/%%s/images" % self.config self.config[ 'url_seriesBannerInfo'] = u"%(api_url)s/series/%%s/images/query?keyType=%%s" % self.config self.config[ 'url_artworkPrefix'] = u"%(base_url)s/banners/%%s" % self.config self.__authorized = False self.headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', 'Accept-Language': self.config['language'], 'User-Agent': 'tvdb/2.0' }
import os, sys, random import yfinance as yf import pandas as pd from tqdm import tqdm from businessdate import BusinessDate # TODO: for caching yfinance results # this is an open issue for the Ticker method: https://github.com/ranaroussi/yfinance/issues/677 import requests_cache SESH = requests_cache.CachedSession('yfinance.cache') SESH.headers['User-agent'] = 'my-program/1.0' #Paths cwdir = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(1, os.path.join(cwdir, "../")) from toolbox.scrape_utils import update_session_proxy def valid_stock(stock_obj): ''' Check if a stock_obj created by yf.Ticker has data ''' try: return 'symbol' in stock_obj.info.keys() except: return False def tickers_parser(tickers, return_list=False, max_items=None): ''' parse a string of space separated tickers with special handling for HK tickers
def get_session(): if 'CACHE_REQUESTS' in os.environ: import requests_cache return requests_cache.CachedSession() import requests return requests.Session()
def validate_image(state, task_meta, **options): """ mod: handle data URIs and responses without a content-type header """ try: node_id = task_meta.get('node_id') node_path = task_meta.get('node_path') prop_name = task_meta.get('prop_name', 'image') node_class = task_meta.get('node_class') required = bool(task_meta.get('required', False)) if node_id: node = get_node_by_id(state, node_id) node_path = [node_id] else: node = get_node_by_path(state, node_path) if options.get('cache_backend'): session = requests_cache.CachedSession( backend=options['cache_backend'], expire_after=options.get('cache_expire_after', 300)) else: session = requests.Session() except (IndexError, TypeError, KeyError): raise TaskPrerequisitesError() actions = [] image_val = node.get(prop_name) if image_val is None: return task_result( not required, "Could not load and validate image in node {}".format( abv_node(node_id, node_path))) if isinstance(image_val, six.string_types): url = image_val elif isinstance(image_val, dict): url = image_val.get('id') elif isinstance(image_val, list): return task_result(False, "many images not allowed") else: raise TypeError("Could not interpret image property value {}".format( abbreviate_value(image_val))) if url: existing_file = state.get('input', {}).get('original_json', {}).get(url) if existing_file: return task_result( True, "Image resource already stored for url {}".format( abbreviate_value(url))) else: try: m = IMAGE_DATA_URI.match(url) if m: if m.groups('mediatype') not in 'image/png, image/svg+xml': return task_result( True, "Invalid image at url {}".format( abbreviate_value(url))) data_uri = url else: result = session.get(url) content_type = result.headers.get('content-type', '') if content_type: if content_type not in 'image/png, image/svg+xml': return task_result( True, "Invalid image at url {}".format( abbreviate_value(url))) else: image_type = check_image_type( io.BytesIO(result.content)) if image_type == 'PNG': content_type = 'image/png' elif image_type == 'SVG': content_type = 'image/svg+xml' else: return task_result( True, "Invalid image at url {}".format( abbreviate_value(url))) encoded_body = base64.b64encode(result.content) data_uri = "data:{};base64,{}".format( content_type, encoded_body) except (requests.ConnectionError, KeyError): return task_result(False, "Could not fetch image at {}".format(url)) else: actions.append(store_original_resource(url, data_uri)) return task_result( True, "Validated image for node {}".format(abv_node(node_id, node_path)), actions)
import os import requests import requests_cache import urllib3 from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry import tempfile logger = logging.getLogger(__name__) cachefile = os.path.join(tempfile.gettempdir(), 'anime-cache') # requests_cache.install_cache(cachefile, backend='sqlite', expire_after=3600) _session = requests_cache.CachedSession(cachefile, backend='sqlite', expire_after=3600) def cacheinfo_hook(response, *args, **kwargs): if not getattr(response, 'from_cache', False): logger.debug('uncached request') else: logger.debug('cached request') return response _session.hooks = {'response': cacheinfo_hook} # _session = requests.Session() urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import requests_cache USER_AGENT = "OSM v0.0.X" SESSION = requests_cache.CachedSession(expire_after=5 * 60, backend="memory") SESSION.headers = {"User-Agent": USER_AGENT} def set_github_token(token): global SESSION SESSION.headers["Authorization"] = f"token {token}" def clear_github_token(): global SESSION if "Authorization" in SESSION.headers: SESSION.headers.pop("Authorization")
LOGGER.debug('avg delay: %s, sleeping %s s, flag: %s', timeout, timeout_, bool(quo)) # will not sleep (timeout_ * bool(quo)=0) for the first exempt calls sleep(timeout_ * bool(quo)) return response return hook URL = "https://fanyi.baidu.com/translate" # SESS = requests.Session() SESS = requests_cache.CachedSession( cache_name=CACHE_NAME, expire_after=EXPIRE_AFTER, allowable_methods=('GET', 'POST'), ) SESS.hooks = { 'response': make_throttle_hook(1, 200) } # to play safe, default: 0.67, 1000 SESS.get(URL, headers=HEADERS) # for js_sign below exec(js2py.translate_js(JS)) # pylint: disable=exec-used def _js_sign(text, gtk='320305.131321201'): '''gtk, does not play role >>> assert _js_sign('test') == '431039.159886'
def __init__(self): # Create the requests cache self.session = requests_cache.CachedSession( cache_name='cache', backend='sqlite', expire_after=seconds_to_cache)
from urllib.parse import quote_plus import requests_cache, imghdr from validators import validate_raw_files from create_csvs import create_csvs from ers import all_keywords_de as keywords, mh_brands, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer, fpath_namer import shutil import requests from helpers.random_user_agent import randomua # Init variables and assets shop_id = 'galeria' root_url = 'https://www.galeria-kaufhof.de' session = requests_cache.CachedSession(fpath_namer(shop_id, 'requests_cache')) session.headers = {'User-Agent': randomua()} country = 'DE' searches, categories, products = {}, {}, {} from parse import parse def getprice(pricestr): if pricestr.startswith('ab '): pricestr = pricestr[3:] if not pricestr: return price = parse('{pound:d} €', pricestr) if price: return price.named['pound'] * 100
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-c', '--count', default='3', type=int, help='Number of searches to run') parser.add_argument('-H', '--hours', default='72', type=int, help='Number hours for cache expiration') args = parser.parse_args() session = requests_cache.CachedSession( cache_name='cache', backend='sqlite', expire_after=datetime.timedelta(hours=args.hours)) out = [] for i in range(0, args.count): print('Starting run {0} of {1}'.format(i + 1, args.count)) out += get_list_from_github(session) if not out: print('Found no data packages, exiting') sys.exit(1) # Sort and deduplicate. out = sorted(set(out)) print('Found %s data packages' % len(out))