Exemplo n.º 1
0
def download_histories_csv(date_str):
    filename = 'data/intraday/us.intraday.polygon.history.csv'

    request_list = _get_requests(date_str)
    # request_list = request_list[:10]
    bt = BaseThrottler(name='base-throttler', delay=0.04)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    with open(filename, 'w') as outfile:
        outfile.write('date,time,close,open,high,low,volume,symbol\n')
        for cnt, res in enumerate(responses):

            if not res:
                print('The response is invalid: %s' % (res))
                continue

            if res.status_code != 200:
                continue

            js = res.json()
            if 'results' not in js:
                print('The response does not have results: %s' % (js))
                continue

            data = js['results']
            if not data:
                continue

            symbol = js['ticker']
            print('{cnt}th {symbol}, blobs: {l}'.format(cnt=cnt,
                                                        symbol=symbol,
                                                        l=len(data)))
            out_lines = []
            for blob in data:
                epoch = int(blob['t']) // 1000
                t = datetime.datetime.fromtimestamp(epoch).astimezone(
                    _TZ_US_EAST)
                date_str = t.strftime('%Y-%m-%d')
                time_str = t.strftime('%H:%M:%S')
                close, open_, high, low, volume = blob['c'], blob['o'], blob[
                    'h'], blob['l'], blob['v']
                out_lines.append(
                    '{date_str},{time_str},{close},{open},{high},{low},{volume},{symbol}\n'
                    .format(date_str=date_str,
                            time_str=time_str,
                            close=close,
                            open=open_,
                            high=high,
                            low=low,
                            volume=volume,
                            symbol=symbol))
            outfile.writelines(out_lines)
Exemplo n.º 2
0
def _run_requests_return_rows(request_list):
    bt = BaseThrottler(name='base-throttler', delay=0.5)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    rows = []
    for cnt, res in enumerate(responses):
        if not res:
            print('The response is invalid: %s' % (res))
            continue

        if res.status_code != 200:
            continue

        if not res:
            print('The response does not have contents: %s' % (res))
            continue

        js = res.json()
        if 'status' not in js or (js['status'] != 'OK'
                                  and js['status'] != 'success'):
            print('The response does not have proper status: %s' % (js))
            continue

        if 'tickers' not in js:
            print('The response does not have results: %s' % (js))
            continue

        for i, ticker in enumerate(js['tickers']):
            symbol = ticker['ticker']
            print('{cnt}th {symbol}'.format(cnt=cnt, symbol=symbol))

            daily = ticker['day']
            close, open_, high, low, volume = daily['c'], daily['o'], daily[
                'h'], daily['l'], daily['v']
            epoch = int(ticker['1547787608999']) // 1000
            t = _TZ_US_EAST.localize(datetime.datetime.fromtimestamp(epoch))
            date_str = t.strftime('%Y-%m-%d')
            rows.append(
                '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'.
                format(date_str=date_str,
                       close=close,
                       open=open_,
                       high=high,
                       low=low,
                       volume=volume,
                       symbol=symbol))

    return rows
Exemplo n.º 3
0
def _run_requests_return_rows(request_list):
    bt = BaseThrottler(name='base-throttler', delay=0.1)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    rows = []
    for cnt, response in enumerate(responses):
        if not response:
            print('The response is invalid: %s' % (response))
            continue

        if response.status_code != 200:
            print('response status code is not 200 OK: {code}'.format(
                code=response.status_code))
            continue

        if not response:
            continue

        js = response.json()
        if not js:
            print('The response is invalid: %s' % (js))
            continue

        if 'dataset' not in js:
            print('The response does not have dataset: %s' % (js))
            continue

        if 'data' not in js['dataset']:
            print('The response data does not have data: %s' % (js))
            continue

        symbol = js['dataset']['dataset_code']
        data = js['dataset']['data']
        for data_for_date in data:
            date_str = data_for_date[0]

            close, open_, high, low, volume = data_for_date[4], data_for_date[
                1], data_for_date[2], data_for_date[3], data_for_date[5]
            rows.append(
                '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'.
                format(date_str=date_str,
                       close=close,
                       open=open_,
                       high=high,
                       low=low,
                       volume=volume,
                       symbol=symbol))
    return rows
Exemplo n.º 4
0
def main():
    args = parse_args()
    session = requests.Session()
    session.headers.update({'user-agent': 'test-user-agent'})
    bt = BaseThrottler(name='base-throttler',
                       delay=args['delay'],
                       session=session)
    reqs = []
    for i in range(0, args['n_reqs']):
        r = requests.Request(method='GET',
                             url=args['url'],
                             data='Request - ' + str(i + 1))
        reqs.append(r)

    with bt:
        throttled_requests = bt.multi_submit(reqs)

    for r in throttled_requests:
        print(r.response)

    print("Success: {s}, Failures: {f}".format(s=bt.successes, f=bt.failures))
Exemplo n.º 5
0
def _run_requests_return_rows(request_list):
    bt = BaseThrottler(name='base-throttler', delay=0.04)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    rows = []
    for cnt, res in enumerate(responses):
        if not res:
            print('The response is invalid: %s' % (res))
            continue

        if res.status_code != 200:
            print('response status code is not 200 OK: {code}'.format(
                code=res.status_code))
            continue

        js = res.json()
        req = request_list[cnt]
        m = re.search(r'stock/([^/]+)', req.url)
        if not m:
            continue

        if not m.groups():
            continue

        symbol = m.groups()[0]

        if not js:
            continue

        print('{cnt}th {symbol}, blobs: {l}'.format(cnt=cnt,
                                                    symbol=symbol,
                                                    l=len(js)))
        prev_close = None
        for blob in js:
            keys = ['date', 'close', 'open', 'high', 'low', 'volume']
            is_blob_compromised = False
            for k in keys:
                if k not in blob:
                    print(
                        'blob: {blob} does not have all the expected keys, missing key: {key}'
                        .format(blob=str(blob), key=k))
                    is_blob_compromised = True
                    break
            if is_blob_compromised:
                continue
            date_str = blob['date']
            close, open_, high, low, volume = blob['close'], blob[
                'open'], blob['high'], blob['low'], blob['volume']
            if volume == '0' or volume == 0 or close is None:
                close, open_, high, low = prev_close, prev_close, prev_close, prev_close

            if close is None:
                continue

            rows.append(
                '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'.
                format(date_str=date_str,
                       close=close,
                       open=open_,
                       high=high,
                       low=low,
                       volume=volume,
                       symbol=symbol))

            prev_close = close
    return rows
Exemplo n.º 6
0
 def __init__(self, logs_cache_dir):
     self.logs_cache_dir = logs_cache_dir + '/'
     self.throttler = BaseThrottler(name='base-throttler', delay=0.2)
     self.throttler.start()
Exemplo n.º 7
0
import logging
import os
import re
import requests
from requests_throttler import BaseThrottler

FOOD_PLAN_INDEX = '''http://www.cnpp.usda.gov/USDAFoodPlansCostofFood/reports?field_publication_type_tid=953&field_publication_date_value[value]&page={page_no}'''
ORIGIN_BASE_ADDRESS = '''http://origin.www.cnpp.usda.gov/'''
ORIGIN_TABLE_ADDRESS  = '''http://origin.www.cnpp.usda.gov/USDAFoodCost-Home.htm'''

COF_REPORT_NAME_REGEX = re.compile(r'/(CostofFood\w\w\w(\d){2,4}\.pdf)\b')

SCRIPT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
PDF_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, 'pdfs')

THROTTLER = BaseThrottler(name='cof-report-throttler', delay=10.0)

class Report:
    '''A container for report name and link that overrides __hash__ and
    __eq__ for sets to remove reports of the same name'''
    def __init__(self, name, link):
        self.name = name
        self.link = link

    def __hash__(self):
        return hash(self.name)

    def __eq__(self, other):
        return self.name == other.name

    def __repr__(self):
Exemplo n.º 8
0
def _run_requests_return_rows(request_list):
    bt = BaseThrottler(name='base-throttler', delay=0.04)
    bt.start()
    throttled_requests = bt.multi_submit(request_list)

    print('shutting down the throttler')
    bt.shutdown()
    print('waiting for the requests to be done')
    bt.wait_end()
    print('run_done')
    responses = [tr.response for tr in throttled_requests]

    rows = []
    for cnt, res in enumerate(responses):
        if not res:
            print('The response is invalid: %s' % (res))
            continue

        if res.status_code != 200:
            print('response status code is not 200 OK: {code}'.format(
                code=res.status_code))
            continue

        if not res:
            continue

        js = res.json()

        if 'status' not in js or (js['status'] != 'OK'
                                  and js['status'] != 'success'):
            print('The response does not have proper status: %s' % (js))
            continue

        keys = ['open', 'afterHours', 'high', 'low', 'volume', 'from']
        is_blob_compromised = False
        for k in keys:
            if k not in js:
                print(
                    'blob: {blob} does not have all the expected keys, missing key: {key}'
                    .format(blob=str(blob), key=k))
                is_blob_compromised = True
                break
        if is_blob_compromised:
            continue

        symbol = js['symbol']

        close, open_, high, low, volume = js['afterHours'], js['open'], js[
            'high'], js['low'], js['volume']
        print('{symbol}'.format(symbol=symbol))
        close_v = float(close)
        if close_v < 1.0 or close_v > 10000:
            continue

        date_str = datetime.datetime.strptime(
            js['from'], "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d")

        rows.append(
            '{date_str},{close},{open},{high},{low},{volume},{symbol}\n'.
            format(date_str=date_str,
                   close=close,
                   open=open_,
                   high=high,
                   low=low,
                   volume=volume,
                   symbol=symbol))

    return rows