def download_histories_csv(date_str): filename = 'data/intraday/us.intraday.polygon.history.csv' request_list = _get_requests(date_str) # request_list = request_list[:10] bt = BaseThrottler(name='base-throttler', delay=0.04) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] with open(filename, 'w') as outfile: outfile.write('date,time,close,open,high,low,volume,symbol\n') for cnt, res in enumerate(responses): if not res: print('The response is invalid: %s' % (res)) continue if res.status_code != 200: continue js = res.json() if 'results' not in js: print('The response does not have results: %s' % (js)) continue data = js['results'] if not data: continue symbol = js['ticker'] print('{cnt}th {symbol}, blobs: {l}'.format(cnt=cnt, symbol=symbol, l=len(data))) out_lines = [] for blob in data: epoch = int(blob['t']) // 1000 t = datetime.datetime.fromtimestamp(epoch).astimezone( _TZ_US_EAST) date_str = t.strftime('%Y-%m-%d') time_str = t.strftime('%H:%M:%S') close, open_, high, low, volume = blob['c'], blob['o'], blob[ 'h'], blob['l'], blob['v'] out_lines.append( '{date_str},{time_str},{close},{open},{high},{low},{volume},{symbol}\n' .format(date_str=date_str, time_str=time_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) outfile.writelines(out_lines)
def _run_requests_return_rows(request_list): bt = BaseThrottler(name='base-throttler', delay=0.5) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] rows = [] for cnt, res in enumerate(responses): if not res: print('The response is invalid: %s' % (res)) continue if res.status_code != 200: continue if not res: print('The response does not have contents: %s' % (res)) continue js = res.json() if 'status' not in js or (js['status'] != 'OK' and js['status'] != 'success'): print('The response does not have proper status: %s' % (js)) continue if 'tickers' not in js: print('The response does not have results: %s' % (js)) continue for i, ticker in enumerate(js['tickers']): symbol = ticker['ticker'] print('{cnt}th {symbol}'.format(cnt=cnt, symbol=symbol)) daily = ticker['day'] close, open_, high, low, volume = daily['c'], daily['o'], daily[ 'h'], daily['l'], daily['v'] epoch = int(ticker['1547787608999']) // 1000 t = _TZ_US_EAST.localize(datetime.datetime.fromtimestamp(epoch)) date_str = t.strftime('%Y-%m-%d') rows.append( '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'. format(date_str=date_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) return rows
def _run_requests_return_rows(request_list): bt = BaseThrottler(name='base-throttler', delay=0.1) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] rows = [] for cnt, response in enumerate(responses): if not response: print('The response is invalid: %s' % (response)) continue if response.status_code != 200: print('response status code is not 200 OK: {code}'.format( code=response.status_code)) continue if not response: continue js = response.json() if not js: print('The response is invalid: %s' % (js)) continue if 'dataset' not in js: print('The response does not have dataset: %s' % (js)) continue if 'data' not in js['dataset']: print('The response data does not have data: %s' % (js)) continue symbol = js['dataset']['dataset_code'] data = js['dataset']['data'] for data_for_date in data: date_str = data_for_date[0] close, open_, high, low, volume = data_for_date[4], data_for_date[ 1], data_for_date[2], data_for_date[3], data_for_date[5] rows.append( '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'. format(date_str=date_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) return rows
def _run_requests_return_rows(request_list): bt = BaseThrottler(name='base-throttler', delay=0.04) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] rows = [] for cnt, res in enumerate(responses): if not res: print('The response is invalid: %s' % (res)) continue if res.status_code != 200: print('response status code is not 200 OK: {code}'.format( code=res.status_code)) continue js = res.json() req = request_list[cnt] m = re.search(r'stock/([^/]+)', req.url) if not m: continue if not m.groups(): continue symbol = m.groups()[0] if not js: continue print('{cnt}th {symbol}, blobs: {l}'.format(cnt=cnt, symbol=symbol, l=len(js))) prev_close = None for blob in js: keys = ['date', 'close', 'open', 'high', 'low', 'volume'] is_blob_compromised = False for k in keys: if k not in blob: print( 'blob: {blob} does not have all the expected keys, missing key: {key}' .format(blob=str(blob), key=k)) is_blob_compromised = True break if is_blob_compromised: continue date_str = blob['date'] close, open_, high, low, volume = blob['close'], blob[ 'open'], blob['high'], blob['low'], blob['volume'] if volume == '0' or volume == 0 or close is None: close, open_, high, low = prev_close, prev_close, prev_close, prev_close if close is None: continue rows.append( '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'. format(date_str=date_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) prev_close = close return rows
def _run_requests_return_rows(request_list): bt = BaseThrottler(name='base-throttler', delay=0.04) bt.start() throttled_requests = bt.multi_submit(request_list) print('shutting down the throttler') bt.shutdown() print('waiting for the requests to be done') bt.wait_end() print('run_done') responses = [tr.response for tr in throttled_requests] rows = [] for cnt, res in enumerate(responses): if not res: print('The response is invalid: %s' % (res)) continue if res.status_code != 200: print('response status code is not 200 OK: {code}'.format( code=res.status_code)) continue if not res: continue js = res.json() if 'status' not in js or (js['status'] != 'OK' and js['status'] != 'success'): print('The response does not have proper status: %s' % (js)) continue keys = ['open', 'afterHours', 'high', 'low', 'volume', 'from'] is_blob_compromised = False for k in keys: if k not in js: print( 'blob: {blob} does not have all the expected keys, missing key: {key}' .format(blob=str(blob), key=k)) is_blob_compromised = True break if is_blob_compromised: continue symbol = js['symbol'] close, open_, high, low, volume = js['afterHours'], js['open'], js[ 'high'], js['low'], js['volume'] print('{symbol}'.format(symbol=symbol)) close_v = float(close) if close_v < 1.0 or close_v > 10000: continue date_str = datetime.datetime.strptime( js['from'], "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d") rows.append( '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'. format(date_str=date_str, close=close, open=open_, high=high, low=low, volume=volume, symbol=symbol)) return rows