def download_histories_csv(date_str): filename = 'data/intraday/us.intraday.polygon.history.csv' request_list = _get_requests(date_str) # request_list = request_list[:10] bt = BaseThrottler(name='base-throttler', delay=0.04) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] with open(filename, 'w') as outfile: outfile.write('date,time,close,open,high,low,volume,symbol\n') for cnt, res in enumerate(responses): if not res: print('The response is invalid: %s' % (res)) continue if res.status_code != 200: continue js = res.json() if 'results' not in js: print('The response does not have results: %s' % (js)) continue data = js['results'] if not data: continue symbol = js['ticker'] print('{cnt}th {symbol}, blobs: {l}'.format(cnt=cnt, symbol=symbol, l=len(data))) out_lines = [] for blob in data: epoch = int(blob['t']) // 1000 t = datetime.datetime.fromtimestamp(epoch).astimezone( _TZ_US_EAST) date_str = t.strftime('%Y-%m-%d') time_str = t.strftime('%H:%M:%S') close, open_, high, low, volume = blob['c'], blob['o'], blob[ 'h'], blob['l'], blob['v'] out_lines.append( '{date_str},{time_str},{close},{open},{high},{low},{volume},{symbol}\n' .format(date_str=date_str, time_str=time_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) outfile.writelines(out_lines)
def _run_requests_return_rows(request_list): bt = BaseThrottler(name='base-throttler', delay=0.5) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] rows = [] for cnt, res in enumerate(responses): if not res: print('The response is invalid: %s' % (res)) continue if res.status_code != 200: continue if not res: print('The response does not have contents: %s' % (res)) continue js = res.json() if 'status' not in js or (js['status'] != 'OK' and js['status'] != 'success'): print('The response does not have proper status: %s' % (js)) continue if 'tickers' not in js: print('The response does not have results: %s' % (js)) continue for i, ticker in enumerate(js['tickers']): symbol = ticker['ticker'] print('{cnt}th {symbol}'.format(cnt=cnt, symbol=symbol)) daily = ticker['day'] close, open_, high, low, volume = daily['c'], daily['o'], daily[ 'h'], daily['l'], daily['v'] epoch = int(ticker['1547787608999']) // 1000 t = _TZ_US_EAST.localize(datetime.datetime.fromtimestamp(epoch)) date_str = t.strftime('%Y-%m-%d') rows.append( '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'. format(date_str=date_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) return rows
def _run_requests_return_rows(request_list): bt = BaseThrottler(name='base-throttler', delay=0.1) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] rows = [] for cnt, response in enumerate(responses): if not response: print('The response is invalid: %s' % (response)) continue if response.status_code != 200: print('response status code is not 200 OK: {code}'.format( code=response.status_code)) continue if not response: continue js = response.json() if not js: print('The response is invalid: %s' % (js)) continue if 'dataset' not in js: print('The response does not have dataset: %s' % (js)) continue if 'data' not in js['dataset']: print('The response data does not have data: %s' % (js)) continue symbol = js['dataset']['dataset_code'] data = js['dataset']['data'] for data_for_date in data: date_str = data_for_date[0] close, open_, high, low, volume = data_for_date[4], data_for_date[ 1], data_for_date[2], data_for_date[3], data_for_date[5] rows.append( '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'. format(date_str=date_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) return rows
def main(): args = parse_args() session = requests.Session() session.headers.update({'user-agent': 'test-user-agent'}) bt = BaseThrottler(name='base-throttler', delay=args['delay'], session=session) reqs = [] for i in range(0, args['n_reqs']): r = requests.Request(method='GET', url=args['url'], data='Request - ' + str(i + 1)) reqs.append(r) with bt: throttled_requests = bt.multi_submit(reqs) for r in throttled_requests: print(r.response) print("Success: {s}, Failures: {f}".format(s=bt.successes, f=bt.failures))
def _run_requests_return_rows(request_list): bt = BaseThrottler(name='base-throttler', delay=0.04) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] rows = [] for cnt, res in enumerate(responses): if not res: print('The response is invalid: %s' % (res)) continue if res.status_code != 200: print('response status code is not 200 OK: {code}'.format( code=res.status_code)) continue js = res.json() req = request_list[cnt] m = re.search(r'stock/([^/]+)', req.url) if not m: continue if not m.groups(): continue symbol = m.groups()[0] if not js: continue print('{cnt}th {symbol}, blobs: {l}'.format(cnt=cnt, symbol=symbol, l=len(js))) prev_close = None for blob in js: keys = ['date', 'close', 'open', 'high', 'low', 'volume'] is_blob_compromised = False for k in keys: if k not in blob: print( 'blob: {blob} does not have all the expected keys, missing key: {key}' .format(blob=str(blob), key=k)) is_blob_compromised = True break if is_blob_compromised: continue date_str = blob['date'] close, open_, high, low, volume = blob['close'], blob[ 'open'], blob['high'], blob['low'], blob['volume'] if volume == '0' or volume == 0 or close is None: close, open_, high, low = prev_close, prev_close, prev_close, prev_close if close is None: continue rows.append( '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'. format(date_str=date_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) prev_close = close return rows
def __init__(self, logs_cache_dir): self.logs_cache_dir = logs_cache_dir + '/' self.throttler = BaseThrottler(name='base-throttler', delay=0.2) self.throttler.start()
import logging import os import re import requests from requests_throttler import BaseThrottler FOOD_PLAN_INDEX = '''http://www.cnpp.usda.gov/USDAFoodPlansCostofFood/reports?field_publication_type_tid=953&field_publication_date_value[value]&page={page_no}''' ORIGIN_BASE_ADDRESS = '''http://origin.www.cnpp.usda.gov/''' ORIGIN_TABLE_ADDRESS = '''http://origin.www.cnpp.usda.gov/USDAFoodCost-Home.htm''' COF_REPORT_NAME_REGEX = re.compile(r'/(CostofFood\w\w\w(\d){2,4}\.pdf)\b') SCRIPT_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) PDF_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, 'pdfs') THROTTLER = BaseThrottler(name='cof-report-throttler', delay=10.0) class Report: '''A container for report name and link that overrides __hash__ and __eq__ for sets to remove reports of the same name''' def __init__(self, name, link): self.name = name self.link = link def __hash__(self): return hash(self.name) def __eq__(self, other): return self.name == other.name def __repr__(self):
def _run_requests_return_rows(request_list): bt = BaseThrottler(name='base-throttler', delay=0.04) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] rows = [] for cnt, res in enumerate(responses): if not res: print('The response is invalid: %s' % (res)) continue if res.status_code != 200: print('response status code is not 200 OK: {code}'.format( code=res.status_code)) continue if not res: continue js = res.json() if 'status' not in js or (js['status'] != 'OK' and js['status'] != 'success'): print('The response does not have proper status: %s' % (js)) continue keys = ['open', 'afterHours', 'high', 'low', 'volume', 'from'] is_blob_compromised = False for k in keys: if k not in js: print( 'blob: {blob} does not have all the expected keys, missing key: {key}' .format(blob=str(blob), key=k)) is_blob_compromised = True break if is_blob_compromised: continue symbol = js['symbol'] close, open_, high, low, volume = js['afterHours'], js['open'], js[ 'high'], js['low'], js['volume'] print('{symbol}'.format(symbol=symbol)) close_v = float(close) if close_v < 1.0 or close_v > 10000: continue date_str = datetime.datetime.strptime( js['from'], "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d") rows.append( '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'. format(date_str=date_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) return rows