示例#1
0
def load_market_data(bm_symbol='^GSPC'):
    bm_filepath = get_data_filepath(get_benchmark_filename(bm_symbol))
    try:
        saved_benchmarks = pd.Series.from_csv(bm_filepath)
    except (OSError, IOError):
        print("""
data files aren't distributed with source.
Fetching data from Yahoo Finance.
""".strip())
        dump_benchmarks(bm_symbol)
        saved_benchmarks = pd.Series.from_csv(bm_filepath)

    saved_benchmarks = saved_benchmarks.tz_localize('UTC')

    most_recent = pd.Timestamp('today', tz='UTC') - trading_day
    most_recent_index = trading_days.searchsorted(most_recent)
    days_up_to_now = trading_days[:most_recent_index + 1]

    # Find the offset of the last date for which we have trading data in our
    # list of valid trading days
    last_bm_date = saved_benchmarks.index[-1]
    last_bm_date_offset = days_up_to_now.searchsorted(
        last_bm_date.strftime('%Y/%m/%d'))

    # If more than 1 trading days has elapsed since the last day where
    # we have data,then we need to update
    if len(days_up_to_now) - last_bm_date_offset > 1:
        benchmark_returns = update_benchmarks(bm_symbol, last_bm_date)
        if (
            benchmark_returns.index.tz is None
            or
            benchmark_returns.index.tz.zone != 'UTC'
        ):
            benchmark_returns = benchmark_returns.tz_localize('UTC')
    else:
        benchmark_returns = saved_benchmarks
        if (
            benchmark_returns.index.tz is None
            or
            benchmark_returns.index.tz.zone != 'UTC'
        ):
            benchmark_returns = benchmark_returns.tz_localize('UTC')

    # Get treasury curve module, filename & source from mapping.
    # Default to USA.
    module, filename, source = INDEX_MAPPING.get(
        bm_symbol, INDEX_MAPPING['^GSPC'])

    tr_filepath = get_data_filepath(filename)
    try:
        saved_curves = pd.DataFrame.from_csv(tr_filepath)
    except (OSError, IOError):
        print("""
data files aren't distributed with source.
Fetching data from {0}
""".format(source).strip())
        dump_treasury_curves(module, filename)
        saved_curves = pd.DataFrame.from_csv(tr_filepath)

    # Find the offset of the last date for which we have trading data in our
    # list of valid trading days
    last_tr_date = saved_curves.index[-1]
    last_tr_date_offset = days_up_to_now.searchsorted(
        last_tr_date.strftime('%Y/%m/%d'))

    # If more than 1 trading days has elapsed since the last day where
    # we have data,then we need to update
    if len(days_up_to_now) - last_tr_date_offset > 1:
        treasury_curves = dump_treasury_curves(module, filename)
    else:
        treasury_curves = saved_curves.tz_localize('UTC')

    tr_curves = {}
    for tr_dt, curve in treasury_curves.T.iterkv():
        # tr_dt = tr_dt.replace(hour=0, minute=0, second=0, microsecond=0,
        #                       tzinfo=pytz.utc)
        tr_curves[tr_dt] = curve.to_dict()

    tr_curves = OrderedDict(sorted(
        ((dt, c) for dt, c in iteritems(tr_curves)),
        key=lambda t: t[0]))

    return benchmark_returns, tr_curves
示例#2
0
def load_market_data(bm_symbol='^GSPC'):
    effective_days = trading_days[trading_days >= "2000-01-01"]
    bm_filepath = get_data_filepath(get_benchmark_filename(bm_symbol))
    try:
        saved_benchmarks = pd.Series.from_csv(bm_filepath)
    except (OSError, IOError):
        print("""
data files aren't distributed with source.
Fetching data from Yahoo Finance.
""".strip())
        #dump_benchmarks(bm_symbol)
        #saved_benchmarks = pd.Series.from_csv(bm_filepath)
    saved_benchmarks = pd.Series(data=0.00002,
                                 index=effective_days).tz_localize(None)
    saved_benchmarks = saved_benchmarks.tz_localize('UTC')

    most_recent = pd.Timestamp('today', tz='UTC') - trading_day
    most_recent_index = trading_days.searchsorted(most_recent)
    days_up_to_now = trading_days[:most_recent_index + 1]

    # Find the offset of the last date for which we have trading data in our
    # list of valid trading days
    last_bm_date = saved_benchmarks.index[-1]
    last_bm_date_offset = days_up_to_now.searchsorted(
        last_bm_date.strftime('%Y/%m/%d'))

    # If more than 1 trading days has elapsed since the last day where
    # we have data,then we need to update

    if len(days_up_to_now) - last_bm_date_offset > 1:
        benchmark_returns = update_benchmarks(bm_symbol, last_bm_date)
        if (benchmark_returns.index.tz is None
                or benchmark_returns.index.tz.zone != 'UTC'):
            benchmark_returns = benchmark_returns.tz_localize('UTC')
    else:
        benchmark_returns = saved_benchmarks
        if (benchmark_returns.index.tz is None
                or benchmark_returns.index.tz.zone != 'UTC'):
            benchmark_returns = benchmark_returns.tz_localize('UTC')

    # Get treasury curve module, filename & source from mapping.
    # Default to USA.
    module, filename, source = INDEX_MAPPING.get(bm_symbol,
                                                 INDEX_MAPPING['^GSPC'])

    tr_filepath = get_data_filepath(filename)
    try:
        saved_curves = pd.DataFrame.from_csv(tr_filepath)
    except (OSError, IOError):
        print("""
data files aren't distributed with source.
Fetching data from {0}
""".format(source).strip())
        #dump_treasury_curves(module, filename)
        #saved_curves = pd.DataFrame.from_csv(tr_filepath)

    saved_curves = pd.DataFrame(0.0001,
                                index=effective_days,
                                columns=[
                                    '10year', '1month', '1year', '20year',
                                    '2year', '30year', '3month', '3year',
                                    '5year', '6month', '7year', 'date', 'tid'
                                ]).tz_localize(None)

    # Find the offset of the last date for which we have trading data in our
    # list of valid trading days
    last_tr_date = saved_curves.index[-1]
    last_tr_date_offset = days_up_to_now.searchsorted(
        last_tr_date.strftime('%Y/%m/%d'))

    # If more than 1 trading days has elapsed since the last day where
    # we have data,then we need to update
    if len(days_up_to_now) - last_tr_date_offset > 1:
        treasury_curves = dump_treasury_curves(module, filename)
    else:
        treasury_curves = saved_curves.tz_localize('UTC')

    tr_curves = {}
    for tr_dt, curve in treasury_curves.T.iteritems():
        # tr_dt = tr_dt.replace(hour=0, minute=0, second=0, microsecond=0,
        #                       tzinfo=pytz.utc)
        tr_curves[tr_dt] = curve.to_dict()

    tr_curves = OrderedDict(
        sorted(((dt, c) for dt, c in iteritems(tr_curves)),
               key=lambda t: t[0]))

    return benchmark_returns, tr_curves
示例#3
0
def load_market_data(bm_symbol='^GSPC'):
    try:
        fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb")
    except IOError:
        print("""
data files aren't distributed with source.
Fetching data from Yahoo Finance.
""").strip()
        dump_benchmarks(bm_symbol)
        fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb")

    saved_benchmarks = pd.Series.from_csv(fp_bm)
    saved_benchmarks = saved_benchmarks.tz_localize('UTC')
    fp_bm.close()

    # Find the offset of the last date for which we have trading data in our
    # list of valid trading days
    last_bm_date = saved_benchmarks.index[-1]
    last_bm_date_offset = trading_days.searchsorted(
        last_bm_date.strftime('%Y/%m/%d'))

    # If more than 1 trading days has elapsed since the last day where
    # we have data,then we need to update
    if len(trading_days) - last_bm_date_offset > 1:
        benchmark_returns = update_benchmarks(bm_symbol, last_bm_date)
        if benchmark_returns.index.tz.zone != 'UTC':
            benchmark_returns = benchmark_returns.tz_localize('UTC')
    else:
        benchmark_returns = saved_benchmarks
        if benchmark_returns.index.tz.zone != 'UTC':
            benchmark_returns = benchmark_returns.tz_localize('UTC')

    try:
        fp_tr = get_datafile('treasury_curves.csv', "rb")
    except IOError:
        print("""
data files aren't distributed with source.
Fetching data from data.treasury.gov
""").strip()
        dump_treasury_curves()
        fp_tr = get_datafile('treasury_curves.csv', "rb")

    saved_curves = pd.DataFrame.from_csv(fp_tr)

    # Find the offset of the last date for which we have trading data in our
    # list of valid trading days
    last_tr_date = saved_curves.index[-1]
    last_tr_date_offset = trading_days.searchsorted(
        last_tr_date.strftime('%Y/%m/%d'))

    # If more than 1 trading days has elapsed since the last day where
    # we have data,then we need to update
    if len(trading_days) - last_tr_date_offset > 1:
        treasury_curves = dump_treasury_curves()
    else:
        treasury_curves = saved_curves.tz_localize('UTC')

    tr_curves = {}
    for tr_dt, curve in treasury_curves.T.iterkv():
        # tr_dt = tr_dt.replace(hour=0, minute=0, second=0, microsecond=0,
        #                       tzinfo=pytz.utc)
        tr_curves[tr_dt] = curve.to_dict()

    fp_tr.close()

    tr_curves = OrderedDict(sorted(
                            ((dt, c) for dt, c in tr_curves.iteritems()),
                            key=lambda t: t[0]))

    return benchmark_returns, tr_curves
示例#4
0
def load_market_data(bm_symbol='^GSPC'):
    try:
        fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb")
    except IOError:
        print("""
data msgpacks aren't distributed with source.
Fetching data from Yahoo Finance.
""").strip()
        dump_benchmarks(bm_symbol)
        fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb")

    bm_list = msgpack.loads(fp_bm.read())

    # Find the offset of the last date for which we have trading data in our
    # list of valid trading days
    last_bm_date = tuple_to_date(bm_list[-1][0])
    last_bm_date_offset = trading_days.searchsorted(
        last_bm_date.strftime('%Y/%m/%d'))

    # If more than 1 trading days has elapsed since the last day where
    # we have data,then we need to update
    if len(trading_days) - last_bm_date_offset > 1:
        update_benchmarks(bm_symbol, last_bm_date)
        fp_bm = get_datafile(get_benchmark_filename(bm_symbol), "rb")
        bm_list = msgpack.loads(fp_bm.read())

    bm_returns = []
    for packed_date, returns in bm_list:
        event_dt = tuple_to_date(packed_date)

        daily_return = DailyReturn(date=event_dt, returns=returns)
        bm_returns.append(daily_return)

    fp_bm.close()

    bm_returns = sorted(bm_returns, key=attrgetter('date'))

    try:
        fp_tr = get_datafile('treasury_curves.msgpack', "rb")
    except IOError:
        print("""
data msgpacks aren't distributed with source.
Fetching data from data.treasury.gov
""").strip()
        dump_treasury_curves()
        fp_tr = get_datafile('treasury_curves.msgpack', "rb")

    tr_list = msgpack.loads(fp_tr.read())

    # Find the offset of the last date for which we have trading data in our
    # list of valid trading days
    last_tr_date = tuple_to_date(tr_list[-1][0])
    last_tr_date_offset = trading_days.searchsorted(
        last_tr_date.strftime('%Y/%m/%d'))

    # If more than 1 trading days has elapsed since the last day where
    # we have data,then we need to update
    if len(trading_days) - last_tr_date_offset > 1:
        update_treasury_curves(last_tr_date)
        fp_tr = get_datafile('treasury_curves.msgpack', "rb")
        tr_list = msgpack.loads(fp_tr.read())

    tr_curves = {}
    for packed_date, curve in tr_list:
        tr_dt = tuple_to_date(packed_date)
        # tr_dt = tr_dt.replace(hour=0, minute=0, second=0, microsecond=0,
        #                       tzinfo=pytz.utc)
        tr_curves[tr_dt] = curve

    fp_tr.close()

    tr_curves = OrderedDict(sorted(
                            ((dt, c) for dt, c in tr_curves.iteritems()),
                            key=lambda t: t[0]))

    return bm_returns, tr_curves
示例#5
0
文件: loader.py 项目: syneric/zipline
def load_market_data(bm_symbol='^GSPC'):
    bm_filepath = get_data_filepath(get_benchmark_filename(bm_symbol))
    try:
        saved_benchmarks = pd.Series.from_csv(bm_filepath)
    except (OSError, IOError):
        print("""
data files aren't distributed with source.
Fetching data from Yahoo Finance.
""".strip())
        dump_benchmarks(bm_symbol)
        saved_benchmarks = pd.Series.from_csv(bm_filepath)

    saved_benchmarks = saved_benchmarks.tz_localize('UTC')

    most_recent = pd.Timestamp('today', tz='UTC') - trading_day
    most_recent_index = trading_days.searchsorted(most_recent)
    days_up_to_now = trading_days[:most_recent_index + 1]

    # Find the offset of the last date for which we have trading data in our
    # list of valid trading days
    last_bm_date = saved_benchmarks.index[-1]
    last_bm_date_offset = days_up_to_now.searchsorted(
        last_bm_date.strftime('%Y/%m/%d'))

    # If more than 1 trading days has elapsed since the last day where
    # we have data,then we need to update
    # We're doing "> 2" rather than "> 1" because we're subtracting an array
    # _length_ from an array _index_, and therefore even if we had data up to
    # and including the current day, the difference would still be 1.
    if len(days_up_to_now) - last_bm_date_offset > 2:
        benchmark_returns = update_benchmarks(bm_symbol, last_bm_date)
        if benchmark_returns.index.tz is None or \
           benchmark_returns.index.tz.zone != 'UTC':
            benchmark_returns = benchmark_returns.tz_localize('UTC')
    else:
        benchmark_returns = saved_benchmarks
        if benchmark_returns.index.tz is None or\
           benchmark_returns.index.tz.zone != 'UTC':
            benchmark_returns = benchmark_returns.tz_localize('UTC')

    # Get treasury curve module, filename & source from mapping.
    # Default to USA.
    module, filename, source = INDEX_MAPPING.get(bm_symbol,
                                                 INDEX_MAPPING['^GSPC'])

    tr_filepath = get_data_filepath(filename)
    try:
        saved_curves = pd.DataFrame.from_csv(tr_filepath)
    except (OSError, IOError):
        print("""
data files aren't distributed with source.
Fetching data from {0}
""".format(source).strip())
        dump_treasury_curves(module, filename)
        saved_curves = pd.DataFrame.from_csv(tr_filepath)

    # Find the offset of the last date for which we have trading data in our
    # list of valid trading days
    last_tr_date = saved_curves.index[-1]
    last_tr_date_offset = days_up_to_now.searchsorted(
        last_tr_date.strftime('%Y/%m/%d'))

    # If more than 1 trading days has elapsed since the last day where
    # we have data,then we need to update
    # Comment above explains why this is "> 2".
    if len(days_up_to_now) - last_tr_date_offset > 2:
        treasury_curves = dump_treasury_curves(module, filename)
    else:
        treasury_curves = saved_curves.tz_localize('UTC')

    tr_curves = {}
    for tr_dt, curve in treasury_curves.T.iteritems():
        # tr_dt = tr_dt.replace(hour=0, minute=0, second=0, microsecond=0,
        #                       tzinfo=pytz.utc)
        tr_curves[tr_dt] = curve.to_dict()

    tr_curves = OrderedDict(
        sorted(((dt, c) for dt, c in iteritems(tr_curves)),
               key=lambda t: t[0]))

    return benchmark_returns, tr_curves